Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FRECPE (scalar, S)

Test 1: uops

Code:

  frecpe s0, s0
  movi v0.16b, 1
  movi v1.16b, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03071e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
1004303722024611900251000100010001044513018303730372738328951000100010003037303711100110000073116112919100030383038303830383038
100430372300611900251000100010001044513018308430372738328951000100010003037303711100110000073116112919100030383038303830383038
1004303722012611900251000100010001044513018303730372738328951000100010003037303711100110000075116112919100030383038303830383038
100430372300611900251000100010001044513018303730372738328951000100010003037303711100110000073216112919100030383038303830383038
1004303723001031900251000100010001044513018303730372737328951000100010003037303711100110000073216112918100030383038303830383038
100430372310611900251000100010001044513018303730372738328951000100010003037303711100110000073116112919100030383038303830383038
100430372300611900251000100010001044513018303730372755328951000100010003037303711100110000073116112919100030383038303830383038
100430372300611900251000100010001044513018303730372738328951000100010003037303711100110000073116122919100030383038303830383038
100430372300611900251000100010001044513018303730372738328951000100010003037303711100110000073116112919100030383038303830383038
100430372300611900251000100010001044513018303730372738328951000100010003037303711100110000073116112919100030383038303830383038

Test 2: Latency 1->2

Code:

  frecpe s0, s0
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03080918191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8a9accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020430037225000006119900251010010010000100100005001067451130018300373003728588328745101002001000020010000300373003711102011009910010010000100020971011611299190100001003003830038300383003830038
1020430037225000006119900251010010010000100100005001067451130018300373003728588328745101002001000020010000300373003711102011009910010010000100010071011611299190100001003003830038300383003830038
10204300372250000061199002510100100100001001000050010674511300183003730037285883287451010020010000200100003003730037111020110099100100100001000500757325212991917100001003013330178301333027230178
102043017922500330611988425101251251000010010126618106857813055830845308922897621287451010020010000200102223013130037111020110099100100100001002121871011611299540100001003003830038300383003830038
1020430085225000006119900251014710010000100100005001067451130018300373003728603328745101432001011320010000300373003731102011009910010010000100030071011611299190100001003003830038300383003830038
10204300372250000216119900251010010010000100101265001067451130018300373003728588328745101002001000020010000300373003711102011009910010010000100010071011611299190100001003003830038300383022830038
1020430037225000006119900251010010010000100100005001067451130018300373003728588328745101002001000020010000300373022611102011009910010010000100000071011611299190100001003003830038300383003830038
10204300372250000126119900251010010010000100100005001067451130018301333003728588328745101002001000020010000300373003711102011009910010010000100060071011611299190100001003003830038300383003830038
1020430037225000006119900251010010010000100100005001067451130018300373003728588328745101002001000020010000300373003711102011009910010010000100010071011611299190100001003003830038300383003830038
10204300372250000126119900251010010010000100100005001067451130018300373003728588328745101002001000020010000300373003711102011009910010010000100020071011611299190100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03041e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100243003722510611990025100101010000101000050106745130018300373003728610032876710010201000020100003003730037111002110910101000010200640416652991910000103003830038300383003830038
100243003722500611990025100101010000101000050106745130018300373003728610032876710010201000020100003003730037111002110910101000010000640616662991910000103003830038300383003830038
100243003722500611990025100101010000101000050106745130018300373003728610032876710010201000020100003003730037111002110910101000010000640416552991910000103003830038300383003830038
100243003722500611990025100101010000101000050106745130018300373003728610032876710010201000020100003003730037111002110910101000010100640616652991910000103003830038300383003830038
100243003722500611990025100101010000101000050106745130018300373003728610032876710010201000020100003003730037111002110910101000010000640616652991910000103003830038300383003830038
100243008522500611990025100101010000101000050106745130018300373003728610032876710010201000020100003003730037111002110910101000010000640416652991910000103003830038300383003830038
100243003722500611990025100101010000101000050106745130018300373003728610032876710010201000020100003003730037111002110910101000010000640516662991910000103003830038300383003830038
100243003722500611990025100101010000101000050106745130018300373003728610032876710010201000020100003003730037111002110910101000010000640616662991910000103003830038300383003830038
100243003722500611990025100101010000101000050106745130018300373003728610032876710010201000020100663003730037111002110910101000010000640516562991910000103003830038300383003830038
100243003722500611990025100101010000101000050106745130018300373003728610032876710010201000020100003003730037111002110910101000010000640516552991910000103003830038300383003830133

Test 3: throughput

Count: 8

Code:

  frecpe s0, s8
  frecpe s1, s8
  frecpe s2, s8
  frecpe s3, s8
  frecpe s4, s8
  frecpe s5, s8
  frecpe s6, s8
  frecpe s7, s8
  movi v8.16b, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire (01)cycle (02)030b18191e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
80204800405990000039258010010080000100800005006400000800208003980039699716699938010020080008200800088003980039118020110099100100800001000000011151170160080036800001008004080040800408004080040
8020480039599000013239258010010080000100800005006400000800208003980039699716699938010020080008200800088003980039118020110099100100800001000000011151170160080036800001008004080040800408004080040
80205800396000000039258010010080000100800005006400000800208003980039699716699938010020080008200800088003980039118020110099100100800001000000011151170160080036800001008004080040800408004080040
80204800396000000039258010010080000100800265006400000800208003980039699716699938010020080008200800088003980039118020110099100100800001000000011151170160080036800001008004080040800408004080040
80204800396000000039258010010080000100800005006400000800208003980039699716699938010020080008200800088003980039118020110099100100800001000000011151170160080036800001008004080040800408004080040
80204800395990009039258010010080000100800005006400000800208003980039699716699938010020080008200800088003980039118020110099100100800001000000011151170160080036800001008004080040800408004080040
802048003959900000961258010010080000100800005006400000800208003980039699716699938010020080008200800088003980039118020110099100100800001000000011151170160080036800001008004080040800408004080040
80204800396000000039258010010080000100800005006400000800208003980039699716699938010020080008200800088003980039118020110099100100800001000000011151170160080036800001008004080040800898004080040
80204800395990000039258010010080000100800005006400000800208003980039699716699938010020080008200800088003980039118020110099100100800001000000011151170160080036800001008004080040800408004080040
80204800395990000039258010010080000100800005006400000800208003980039699716699938010020080008200800088003980039118020110099100100800001000000011151170160080036800001008004080040800408004080040

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
80024800395990502580010108000010800005064000008002080039800396998637001980010208000020800008003980039118002110910108000010000005020116118003480000108004080040800408004080040
80024800395990502580010108000010800005064000018002080039800396998637001980010208000020800008003980039118002110910108000010000005020116118003480000108004080040800408004080040
80024800396000502580010108000010800265064000018002080039800396998637001980010208000020800418003980187118002110910108000010000105020116118003480000108004080040800408004080089
800248003959909242580010108000010800005064000018002080039800396998637001980010208000020800008003980039118002110910108000010001005020116118003480000108004080040800898004080040
80024800395990502580010108000010800005064000018002080039800396998637001980010208000020800008003980039118002110910108000010200005020116118003480000108004080040800408004080040
80024800395990502580010108000010800005064000018002080039800396998637001980010208000020800008003980039118002110910108000010000005020116118003480000108004080040800408004080040
80024800396000502580010108000010800005064000018002080039800396998637001980010208000020800008003980039118002110910108000010000005020116118003480000108004080040800408004080040
80024800395990502580010108000010800005064000018002080039800397001437001980010208000020800008003980039118002110910108000010000005074116118003480000108004080040800408004080040
80024800396000502580010108000010800005064000018002080039800396998637001980010208000020800008003980039118002110910108000010000005020116118003480000108004080040800408004080040
80024800395990502580010108000010800005064000008002080039800396998637001980010208000020800008003980039118002110910108000010000005020116118003480000108004080040800408004080040