Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SHA256H2

Test 1: uops

Code:

  sha256h2 q0, q1, v2.4s
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
100440383106128652510001000100013989514019403840383690338961000100030004038403811100110000073216223873100040394039403940394039
100440383006128652510001000100013989504019403840383690338961000100030004038403811100110000073216223873100040394039403940394039
1004403830019728652510001000100013989504019403840383690338961000100030004038403811100110000073216223873100040394039403940394039
100440383006128652510001000100013989504019403840383690338961000100030004038403811100110000073216223873100040394039403940394039
1004403830061286525100010001000139895040194038403836903389610001000300040384038111001100022673216223873100040394039403940394039
100440383106128652510001000100013989504019403840383690338961000100030004038403811100110005073216223873100040394039403940394039
100440383006128652510001000100013989504019403840383690338961000100030004038403811100110000073216223873100040394039403940394039
1004403831061286525100010001000139895040194038403836903389610001000300040384038111001100013073216223873100040394039403940394039
1004403830061286525100010001000139895040194038403836903389610001000300040384038111001100016073216223873100040394039403940394039
100440383006128652510001000100013989504019403840383690338961000100030004038403811100110000073216223908100040394039403940394039

Test 2: Latency 1->1

Code:

  sha256h2 q0, q1, v2.4s
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0038

retire (01)cycle (02)030b18191e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8a9accdcfd0d5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102044003830000000124298652510100100100001001000050014268950400194003840038385403387461010020010000200300004003840038111020110099100100100001000000071003162239871100001004003940039400394003940039
10204400383000000061298652510100100100001001000050014268951400194003840038385403387461010020010000200300004003840038111020110099100100100001000000071002162239871100001004003940039400394003940039
10204400383000000061298652510100100100001001000050014268951400194003840038385403387461010020010000200300004003840038111020110099100100100001000000071002162239871100001004003940039400394003940039
102044003830000000233298652510100100100001001000050014268950400194003840038385403387461010020010000200300004003840038111020110099100100100001000000071002162239871100001004003940039400394003940039
102044003830000000533298652510100100100001001000050014268951400194003840038385403387461010020010000200300004003840038111020110099100100100001000000071012162239871100001004003940039400394003940039
10204400383000000061298652510100100100001001000050014268950400194003840038385403387461010020010000200300004003840038111020110099100100100001000000071012162239871100001004003940039400394003940039
102044003830000054061298652510100100100001001000050014268951400194003840038385403387461010020010000200300004003840038111020110099100100100001000000071012163239871100001004003940039400394003940039
10204400383000000061298652510100100100001001000050014268951400194003840038385403387461010020010000200300004003840038111020110099100100100001000000071013162339871100001004003940039400394003940039
10204400383000000061298652510100100100001001000050014268950400194003840038385403387461010020010000200300004003840038111020110099100100100001000000071012162339871100001004003940039400394003940039
10204400383000000061298652510100100100001001000050014268951400194003840038385403387461010020010000200300004003840038111020110099100100100001000000071012162239871100001004003940039400394003940039

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0038

retire (01)cycle (02)03080b18191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100244003830000000229298652510010101000010100005014268951400194003840038385623387681001020100002030000400384003811100211091010100001000640416333987210000104003940039400394003940039
100244003830000000251298652510010101000010100005014268951400194003840038385623387681001020100002030000400384003811100211091010100001000640316333987210000104003940039400394003940039
10024400383011132061298652510010101000010100005014268951400194003840038385623387681001020100002030000400384003811100211091010100001000640316433987210000104003940039400394003940039
10024400383000000061298652510010101000010100005014268950400194003840038385623387681001020100002030000400384003811100211091010100001000640316433987210000104003940039400394003940039
10024400382990000061298652510010101000010100005014268950400194003840038385623387681001020100002030000400384003821100211091010100001000640316333987210000104003940039400394003940039
100244003830000000726298652510010101000010100005014268951400194003840038385623387681001020100002030168400384003811100211091010100001000640316333987210000104003940039400394003940039
10024400383000000061298652510010101000010100005014268951400194003840038385623387681001020100002030000400384003811100211091010100001000640316333987210000104003940039400394003940039
10024400383000000061298652510010101000010100005014268951400194003840038385623387681001020100002030000400384003811100211091010100001010640316333987210000104003940039400394003940039
10024400383000000061298652510010101000010100005014268951400194003840038385623387681001020100002030000400384003811100211091010100001000640316433987210000104003940039400394003940039
10024400383000000061298652510010101000010100005014268951400194003840038385623387681001020100002030000400384003811100211091010100001000640316333987210000104003940039400394003940039

Test 3: Latency 1->2

Code:

  sha256h2 q0, q0, v1.4s
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 5.0037

retire (01)cycle (02)0308090b0f191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8acbranch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020450037375000200613982925101001001000010010000500178625915001850037500374849103487451010020010000200300005003750037111020110099100100100001000000071011611498220100001005003850038500385003850038
1020450037375000000613982925101001001000010010000500178625915001850037500374851303487451010020010000200300005003750037111020110099100100100001000000071011611498220100001005003850038500385003850038
1020450037374000000613982925101001001000010010000500178625915001850037500374849103487451010020010000200300005003750037111020110099100100100001000000072711611498220100001005003850038500385003850038
1020450037375000000613982925101001001000010010000500178625905001850037500374849103487451010020010000200300005003750037111020110099100100100001000000071011611498220100001005003850038500385003850038
1020450037375000000613982925101001001000010010000500178625915001850037500374849103487451010020010000200300005003750037111020110099100100100001000001071011611498220100001005003850038500385003850038
1020450037374000000613982925101001001000010010000500178625915001850037500374849103487451010020010000200300005003750037111020110099100100100001000000071011611498220100001005003850038500385003850038
1020450037375000000613982925101001001000010010000500178625915001850037500374849103487451010020010000200300005003750037111020110099100100100001000000071011611498220100001005003850038500385003850038
10204500373750000007263982925101001001000010010000500178625915001850037500374849103487451010020010000200300005003750037111020110099100100100001000000071011611498220100001005003850038500385003850038
1020450037375000000613982925101001001000010010000500178625915001850037500374849103487451010020010000200300005003750037111020110099100100100001000000071011611498220100001005003850038500385003850038
1020450037375000000613982925101001001000010010000500178625915001850037500374849103487451010020010000200300005003750037111020110099100100100001000000071011611498220100001005003850038500385003850038

1000 unrolls and 10 iterations

Result (median cycles for code): 5.0037

retire (01)cycle (02)0318191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a8acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100245003737500061398292510010101000010100005017862595001850082500374851303487671001020100002030000500375003711100211091010100001000000640316734982410000105003850038500385003850038
100245003737412061398292510010101000010100005017862595001850037500374851303487671001020100002030000500375003711100211091010100001020000640316434982410000105003850038500385003850038
100245003737500061398292510010101000010100005017862595001850037500374851303487671001020100002030000500375003711100211091010100001000000640316434982410000105003850038500385003850038
1002450037375000103398292510010121001010100007117862595008850037500844851303487671001020100002030000500845008511100211091010100001000030640316534982410000105003850038500385003850038
1002450037374001261398292510010101000010100005017862595001850037500374851303487671001020100002030000500375003711100211091010100001000000640316634982410000105003850038500385003850038
100245003737500061398292510026101000010100006117862595001850037500374857803487671001020100002030000500375003711100211091010100001000000640316344982410000105003850038500385003850038
100245003737400061398292510010101000010100005017862595001850037500374851303487671001020100002030000500375003711100211091010100001000000640316334982410000105003850038500385003850038
1002450037375000536398292510010101000010100005017862595001850037500374851333487671001020100002030000500375003711100211091010100001000000640316534982410000105003850038500385003850038
1002450037375000107398292510010101000010100005017862595001850037500374851303487671001020100002030000500375003711100211091010100001000000640316334982410000105003850038500385003850038
100245003737500061398292510010101000010100005017862595001850037500374851303487671001020100002030000500375003711100211091010100001000000640316434982410000105003850038500385003850038

Test 4: Latency 1->3

Code:

  sha256h2 q0, q1, v0.4s
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 5.0037

retire (01)cycle (02)03090b1e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a8a9accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020450037375000061398292510100100100001001000050017862591500185003750037484913487451010020010000200300005003750037111020110099100100100001000000071011611498220100001005003850038500385003850038
1020450037375000061398292510100100100001001000050017862590500185003750037484913487451010020010000200300005003750037211020110099100100100001000000071011611498220100001005003850086500385003850038
1020450037375000061398292510100100100001001000050017862591500185003750037484913487451010020010000200300005003750037111020110099100100100001000000071011611498220100001005003850038500385003850038
1020450037374000061398292510100100100001001000050017862591500185003750037484913487451010020010000200300005003750037111020110099100100100001000000071011611498220100001005003850038500385003850038
1020450037374000061398292510100100100001001000050017862591500185003750037484913487451010020010000200300005003750037111020110099100100100001000000071011611498220100001005003850038500385003850038
1020450084374000061398292510100104100001001000050017862590500185003750037484913487771010020010000200300005003750037111020110099100100100001000004371011610498220100001005003850038500385003850038
10204500373750027688460398292510100100100001001000050017862591500185003750037484913487451010020010000200300005003750037111020110099100100100001000010071011611498220100001005003850038500385003850038
10204500373740000103398292510100100100001001000050017862590500185003750037484913487451010020010000200300005003750037111020110099100100100001000000071011611498220100001005003850038500385008550038
102045003737511120580398292510100100100001001000050017862590500185003750037484913487451010020010000200300005003750037111020110099100100100001000000071011611498220100001005003850038500385003850038
1020450037375000061398292510100100100001001000050017862590500185003750037484913487451010020010000200300005003750037111020110099100100100001000000371011611498227100001005003850038500385003850038

1000 unrolls and 10 iterations

Result (median cycles for code): 5.0037

retire (01)cycle (02)030b1e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002450037375006139829251001010100001010000501786259050018500375003748513348767100102010000203000050037500371110021109101010000100006402162249824010000105003850038500385003850038
1002450037374006139829251001010100001010000501786259050018500375003748513348767100102010000203000050037500371110021109101010000100006402162249824010000105003850038500385003850038
1002450037375006139829251001010100001010000501786259050018500375003748513348767100102010000203000050037500371110021109101010000100006402162249824010000105003850038500385003850038
1002450037375006139829251001010100001010000501786399050018500375003748513348767100102010000203000050037500371110021109101010000100306402162249824010000105003850038500385003850038
1002450037375006139829251001010100001010000501786259150018500375003748513348767100102010000203000050037500371110021109101010000100006402162249824010000105003850038500385003850038
1002450037375006139829251001010100001010000501786259050018500375003748513348767100102010000203000050037500371110021109101010000100006402162249824010000105003850038500385003850038
1002450037374006139829251001010100001010000501786259050018500375003748513348767100102010000203000050037500371110021109101010000100006402162249824010000105003850038500385003850038
1002450037374096139829251001010100001010000501786259050018500375003748513348767100102010000203000050037500371110021109101010000100006402162249824010000105003850038500385003850038
1002450037375006139829251001010100001010000501786259050018500375003748513348767100102010000203000050037500371110021109101010000100006402162249824010000105003850038500385003850038
1002450037375006139829251001010100001010000501786259050018500375003748513348767100102010000203000050037500371110021109101010000100006402162249824010000105003850038500385003850038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  sha256h2 q0, q8, v9.4s
  movi v1.16b, 0
  sha256h2 q1, q8, v9.4s
  movi v2.16b, 0
  sha256h2 q2, q8, v9.4s
  movi v3.16b, 0
  sha256h2 q3, q8, v9.4s
  movi v4.16b, 0
  sha256h2 q4, q8, v9.4s
  movi v5.16b, 0
  sha256h2 q5, q8, v9.4s
  movi v6.16b, 0
  sha256h2 q6, q8, v9.4s
  movi v7.16b, 0
  sha256h2 q7, q8, v9.4s
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0005

retire (01)cycle (02)0308090b191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8acc5cdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204160039119900000061799372580100100800001008000050056779521600200160039160039139887313999780100200800002002400001600391600391116020110099100100160000100000001011031611159980160000100160040160040160040160040160040
160204160039119900000061799372580100100800001008000050056779521600200160039160039139887313999780100200800002002406751600391600391116020110099100100160000100000001011011611159980160000100160040160040160040160040160040
160204160039119900000061799372580100100800001008004550056779521600200160039160039139887313999780100200800002002400001600391600391116020110099100100160000100000001011021611159980160000100160040160040160040160040160040
1602041600391199000000726799372580100100800001008000050056779521600200160039160100139887313999780100200800002002400001600391600391116020110099100100160000100000001011021611159980160000100160040160040160040160040160040
160204160039119900000061799372580100100800001008000050056779521600200160039160039139887313999780100200800002002400001600391600391116020110099100100160000100000001011021611159980160000100160040160040160040160040160040
1602041600391199000130319261799372580100100800001008000050056779521600580160039160039139887313999780100200800002002408461602891600391116020110099100100160000100200001011021611159980160000100160040160040160147160040160040
1602041600871199011012061799372580100100800001008000050056779521600200160039160039139887313999780100200800002002406781600391600391116020110099100100160000100000001011031611159980160000100160040160040160040160040160040
1602041600391199000000726799372580100100800001008000050056779521600200160039160039139887313999780100200800002002400001600391600391116020110099100100160000100000001011021611159980160000100160040160040160040160040160040
16020416003911990000001280799372580100100800001008000050056779521600203160039160039139887313999780100200800002002400001600391600391116020110099100100160000100000001011021611159980160000100160040160095160040160040160040
160204160039119800003061799372580100100800001008000050056779521600200160039160039139887313999780100200800002002400001600391600391116020110099100100160000100003001011021611159980160000100160040160040160040160040160040

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0005

retire (01)cycle (02)03090a0b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a7a8a9acc2c5branch mispredict (cb)cdcfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024160039119800000900677993725800101080000108000050567795221016002016003916003913990931400198001020800002024000016003916003911160021109101016000010000000000010022622141621171015998202017916000010160040160040160040160040160040
160024160039119800000000677993725800101080000108000050567795211016002016003916003913990931400198001020800002024000016003916003911160021109101016000010000000000010022311161621171115998202017916000010160040160040160040160040160040
16002416003911990004000067799371178001010800131180180505678027115160020160039160039139909171401028005520801132024051016013716009021160021109101016000010000000000010022351151621161015998202017916000010160040160040160040160040160040
16002416014011991013200077479937258011710800001380225505677952115160020160039160189139909341400748001020800562024000016008916003911160021109101016000010000000000010022841181621112815998202017916000010160040160040160040160040160040
160024160039119900000000677993725800101080000118000050567795221516002016003916003913990931400198001020800002024000016003916003911160021109101016000010000000000010022841151621191015998202017916000010160040160040160040160040160040
160024160039119800000840073279937258001010800001080000505677952210160020160039160039139909314001980010208000020240000160039160039111600211091010160000100000000000100223411216211101915998202017916000010160040160040160040160040160040
16002416003911980000000067799202580010108000010800005056779521001600201600391600391399093140019800102080000202400001600391600391116002110910101600001000000000011002234181621161415998202017916000010160040160040160040160040160040
16002416003911980000000073279937258001010800131080000505677952215160020160039160039139909714001980010208000020240000160039160039111600211091010160000100000000000100228416162116815998202017916000010160040160040160040160040160040
1600241600391199000001200677993725800101080000108000050567795210016002016003916003913990931400198001020800002024000016003916003911160021109101016000010000000000010022315151621181015998202017916000010160040160040160040160040160040
1600241600391199000000007327993725800101080000108000050567795221016002016003916003913990931400198001020800002024000016003916003911160021109101016000010000000000010022341131621115715998202017916000010160040160040160040160040160040

Test 6: throughput

Count: 16

Code:

  sha256h2 q0, q16, v17.4s
  sha256h2 q1, q16, v17.4s
  sha256h2 q2, q16, v17.4s
  sha256h2 q3, q16, v17.4s
  sha256h2 q4, q16, v17.4s
  sha256h2 q5, q16, v17.4s
  sha256h2 q6, q16, v17.4s
  sha256h2 q7, q16, v17.4s
  sha256h2 q8, q16, v17.4s
  sha256h2 q9, q16, v17.4s
  sha256h2 q10, q16, v17.4s
  sha256h2 q11, q16, v17.4s
  sha256h2 q12, q16, v17.4s
  sha256h2 q13, q16, v17.4s
  sha256h2 q14, q16, v17.4s
  sha256h2 q15, q16, v17.4s
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0002

retire (01)cycle (02)03081e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8a9acc5cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204320039239810611599372516010010016000010016000050011357952032002003200393200392998873299997160100200160000200480216320039320039111602011009910010016000010000000010110316223199680160000100320040320040320040320040320040
160204320039239700611599162516010010016000010016000050011357952032002003200393200392998873299997160100200160000200480000320039320039111602011009910010016000010000000010110216223199680160000100320040320085320040320040320040
1602043200392397007261599372516010010016000010016004758511357952032002003200393200392999143299997160100200160000200480000320039320039111602011009910010016000010000000010110216223199680160000100320040320040320040320040320040
160204320039239800611599372516010010016000010016000050011357952032002003200393200392998873299997160100200160000200480000320039320039111602011009910010016000010000000010110116223200400160000100320040320040320040320040320040
1602043200392397007261599372516010010016000010016000050011357952032002003200393200392998873299997160100200160000200480000320039320039111602011009910010016000010000000010110216223199680160000100320040320040320040320040320040
16020432003923970687726159937251601001001600001001600005001135818603207050320039320039299887329999716010020016000020048000032003932003911160201100991001001600001000303300010110216223199680160000100320040320040320040320040320040
160204320039239700611599372516010010016000010016000050011357952032002033200393200392998873299997160100200160000200480000320039320039111602011009910010016000010000000010110216223199680160000100320040320040320040320040320040
1602043200392397006115993725160100100160000100160000500113579520320020032003932003929988732999971601002001600002004800003200393200391116020110099100100160000100000360010110216223199680160000100320040320040320040320040320040
1602043200392397006311599372516010010016000010316000050011357952032002003200393200392998873300034160100200160000200480000320039320039111602011009910010016000010000000010110216223199680160000100320040320040320040320040320040
16020432003923970061159937251601001001600001091601805001135795203200200320039320039299887329999716010020016000020048000032003932003911160201100991001001600001000003540010110216223199680160000100320040320040320040320040320090

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0002

retire (01)cycle (02)0307080b191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8acc2c5cdcfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0eaebec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002432003923970000007321599372516001010160000101600005011357952115320020032003932003929990903300019160010201600002048000032003932009011160021109101016000010000000001002282123161111663199681615816000010320089320040320040320040320040
16002432003923970000901311599372516001010160000101600475011357952015320020032003932003929990903300019160192201600002048000032003932003911160021109101016000010000030001002483116161111663199681615816000010320040320040320040320040320040
16002432003923970000002561599372516001010160000101600005011357952115320020032003932003929990903300019160010201600002048000032003932003911160021109101016000010000000001002284116161111663199681615816000010320040320090320040320040320040
16002432003923970000602571599372516001010160000101600005011357952115320020032003932003929990903300019160010201600002048000032003932003911160021109101016000010000000001002285116161117163199681615816000010320040320040320040320040320040
16002432003923970000001551599372516001010160000101600005011358111115320020032003932003929990903300019160010201600002048000032003932003911160021109101016000010000000001002285161611111163199683215816000010320040320040320040320040320040
16002432003923970000007321599372516001010160000101600005011357952115320020032003932003929990903300019160010201600002048000032003932003911160021109101016000010000000001002285116161116163199681615816000010320040320040320040320040320040
16002432003923970000006715993725160024101600651016000050113585611153202480320039320285299929016300019160010201600002048000032003932003911160021109101016000010000046300001002485113161111663199681615816000010320040320040320040320040320391
16002432078424000000007321599372516001010160000101600005011357952115320020032003932003929990903300019160056201600002048000032003932003911160021109101016000010000000001002285161611011363199681615816000010320040320040320040320040320040
160024320039239700006067159937251600101016000010160000501135795211532002003200393200392999090253000191600102016000020480000320039320039111600211091010160000100000000010022851161611116163199681615816000010320040320040320040320040320040
160024320039239800000067159918251600101016000010160000601135795201532002003200393200392999090330001916001020160000204800003200393200391116002110910101600001000000000100228515161117173199683215816000010320040320040320040320040320040