Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SHA1C

Test 1: uops

Code:

  sha1c q0, s1, v2.4s
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03191e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
100440383000822865251000100010001398950401940384038369033896100010003000403840381110011000073216223873100040394039403940394039
100440383009612865251000100010001398951401940384038369033896100010003000403840381110011000073216223873100040394039403940394039
1004403830012612865251000100010001398951401940384038369033896100010003000403840381110011000073216223873100040394039403940394039
1004403830001242865251000100010001398951401940384038369033896100010003000403840381110011000073216223873100040394039403940394039
100440383000612865251000100010001398951401940384038369033896100010003000403840381110011000073216223873100040394039403940394039
100440383000612865251000100010001398950401940384038369033896100010003000403840381110011000073216223873100040394039403940394039
100440383000612865251000100010001398951401940384038369033896100010003000403840381110011000073216223873100040394039403940394039
100440383000612865251000100010001398950401940384038369033896100010003000403840381110011000073216223873100040394039403940394039
100440383000612865251000100010001398951401940384038369033896100010003000403840381110011000073216223873100040394039403940394039
1004403830001242865251000100010001398950401940384038369033896100010003000403840381110011000073216223873100040394039403940394039

Test 2: Latency 1->1

Code:

  sha1c q0, s1, v2.4s
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0038

retire (01)cycle (02)03070a0b1e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102044003830011001642986525101001001000010010000500142689504001904003840038385477387411010020010008200300244003840038111020110099100100100001000011171903164439883100001004003940039400394003940039
102044003830011001642986525101001001000010010000500142689504001904003840038385477387411010020010008200300244003840038111020110099100100100001000011172004163339883100001004008740039400394003940039
102044003830011001642986525101001001000010010000500142689504001904003840038385403387461010020010000200300004003840038111020110099100100100001000000071216166639871100001004003940039400394003940039
102044003830011001642986525101001001000010010000500142689504001904003840038385403387461010020010000200300004003840038111020110099100100100001000000071216166639871100001004003940039400394003940039
102044003830011001642986525101001001000010010000500142689504001904003840038385403387461010020010000200300004003840038111020110099100100100001000000071216164639871100001004003940039400394003940039
102044003830011001642986525101001001000010010000500142689504001904003840038385403387461010020010000200300004003840038111020110099100100100001000000071214166539871100001004003940039400394003940039
102044003830011101642986525101001001000010010000511142689504001904003840038385403387461010020010000200300004003840038111020110099100100100001000000071216164639871100001004003940039400394003940039
102044003830011001642986525101001001000010010000500142689504001904003840038385403387461010020010000200300004003840038111020110099100100100001000000071216166639871100001004003940039400394003940039
102044003829911001642986525101001001000010010000500142689504001904003840038385403387461010020010000200300004003840038111020110099100100100001000000071216166639871100001004003940039400394003940039
102044003830011001642986525101001001000010010000500142689504001904003840038385403387461010020010000200300004003840038111020110099100100100001000000071216166639871100001004003940039400394003940039

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0038

retire (01)cycle (02)03181e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024400383000061298652510010101000010100005014268950400194003840038385623387681001020100002030000400384003811100211091010100001000640216223987210000104003940039400394003940039
10024400383000061298652510010101000010100005014268951400194003840038385623387681001020100002030000400384003811100211091010100001000640216323987210000104003940039400394003940039
100244003830002461298652510016101000010100005014268950400194003840087385813387681001020100002030000400384003811100211091010100001000640216223987210000104003940039400394003940039
10024400383000061298652510010101000010100005014268951400194003840038385623387681001020100002030000400384003811100211091010100001000640216223987210000104003940039400394003940039
10024400383000061298652510010101000010100005014268950400194003840038385623387681001020100002030000400384003811100211091010100001000640216223987210000104003940039400394003940039
10024400383000061298652510010101000010100005014268951400194003840038385623387681001020100002030000400384003811100211091010100001000640216223987210000104003940039400394003940039
10024400382990361298652510010101000010100005014268950400194003840038385623387681001020100002030000400384003811100211091010100001000640216223987210000104003940039400394003940039
10024400383000061298652510010101000010100005014268950400194003840038385623387681001020100002030000400384003811100211091010100001000640216223987210000104003940039400394003940039
100244003830000726298652510010101000010100005014268950400194003840038385623387681001020100002030000400384003811100211091010100001000640216223987210000104003940039400394003940039
10024400383000061298652510010101000010100005014268950400194003840038385623387681001020100002030000400384003811100211091010100001000640216223987210000104003940039400394003940039

Test 3: Latency 1->2

Code:

  sha1c q0, s0, v1.4s
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 5.0037

retire (01)cycle (02)03070b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8accdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204500373750000006139829251010010010000100100005001786259150018500375003748491348745101002001000020030213500375003711102011009910010010000100000071021611498220100001005003850038500385003850038
10204500373750000006139829251010010010000100100005001786259150018500375003748491348745101002001000020030000500375003711102011009910010010000100010071011611498220100001005003850038500385003850038
10204500373740000009439829251010010010000100100005001786259150018500375003748491348745101002001000020030000500375003711102011009910010010000100000071011611498220100001005003850038500385003850038
10204500373740000006139829251010010010000100100005001786259150018500375003748491348745101002001000020030000500375003711102011009910010010000100000071011611498220100001005003850085500855003850038
102045003737500000072639829251010010010000100100005001786259150018500375003748491348745101002001000020030000500375003711102011009910010010000100000071011611498220100001005003850038500385003850038
10205500373750000006139829251010010010000100100005001786259150018500375003748491348745101002001000020030000500375003711102011009910010010000100003071011611498220100001005003850038500385003850038
10204500373740000006139829251010010010000100100005001786259150018500375003748491348745101002001000020030000500375003711102011009910010010000100000071221611498220100001005003850038500385003850038
10204500373751000006139829251010010010000100100005001786259050018500375003748491348745101562001000020030000500375003711102011009910010010000100000071011611498220100001005003850038500385003850038
10204500373750000006139829251010010010000132100005001786259150018500375003748491348745101002001000020030000500845003711102011009910010010000100010071011611498220100001005003850038500385003850038
10204500373750000006139829251010010010000100100005001786259150018500375003748491348745101002001000020030000500375003711102011009910010010000100400071011611498220100001005003850038500385003850038

1000 unrolls and 10 iterations

Result (median cycles for code): 5.0037

retire (01)cycle (02)031e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9acc2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024500373740061398292510010101000010100005017862590500185003750037485133487671001020100002030000500375003711100211091010100001000000640216224982410000105003850038500385003850038
10024500373750061398292510010101000010100005017862590500185003750037485133487671001020100002030000500375003711100211091010100001000000640216224982410000105003850038500385003850038
10024500373750061398292510010101000010100005017862591500185003750037485133487671001020100002030000500375003711100211091010100001000000640216224982410000105003850038500385003850038
10024500373750061398292510010101000010100005017862590500185003750037485133487671001020100002030000500375003711100211091010100001000000640216224982410000105003850038500385003850038
10024500373740061398292510010101000010100005017862590500185003750037485133487671001020100002030000500375003711100211091010100001000000640216224982410000105003850038500385003850038
10024500373740061398292510010101000010100005017862590500185003750037485133487671001020100002030000500375003711100211091010100001000000640216224982410000105003850038500385003850038
100245003737400726398292510010101000010100005017862590500185003750037485133487671001020100682030000500375003711100211091010100001000000640216224982410000105003850038500385003850038
10024500373750061398292510010101000010100005017862591500185003750037485133487671001020100002030000500375003711100211091010100001000000640216224982410000105003850038500385003850038
100245003737400726398292510010101000010100005017862591500185003750037485133487671001020100002030000500375003711100211091010100001000000640216224982410000105003850038500385003850038
10024500373740061398292510010101000010100005017862591500185003750037485133487671001020100002030000500375003711100211091010100001000000640216224982410000105007150038500385003850038

Test 4: Latency 1->3

Code:

  sha1c q0, s1, v0.4s
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 5.0037

retire (01)cycle (02)03181e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8c5branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020450037375000613982925101001001000010010000500178625905001850037500374849134874510100200100002003000050037500371110201100991001001000010000071011611498220100001005003850038500385003850038
1020450084375000613982925101001001000010010000500178625905001850037500374849134874510100200100002003000050037500371110201100991001001000010000071011611498220100001005003850038500385003850038
1020450037374000613982925101001001000010010000500178625905001850037500374849134874510100200100002003000050037500371110201100991001001000010000071011611498220100001005003850038500385003850038
1020450037375000613982925101001001000010010000500178625905001850037500374849134874510100200100002003000050037500371110202100991001001000010000071011611498220100001005003850038500385003850038
1020450037375000613982925101001001000010010000500178625905001850037500374849134874510100200100002003000050037500371110201100991001001000010000071011611498220100001005003850038500385003850038
1020450037374000613982925101001001000010010000500178625905001850037500374849134874510100200100002003000050037500371110201100991001001000010000071011611498220100001005003850038500385003850038
1020450037375000613982925101001001000010010000500178625905001850037500374849134874510100200100002003000050037500371110201100991001001000010000071011611498220100001005003850038500385003850038
1020450037375000613982925101001001000010010000500178625905001850037500374849134874510100200100002003000050037500371110201100991001001000010000071011611498220100001005003850038500385003850038
1020450037374000613982925101001001000010010000500178625905001850037500374849134874510100200100002003000050037500371110201100991001001000010000071011611498220100001005003850038500385003850038
1020550037375000613982925101001001000010010000500178625905001850037500374849134874510100200100002003000050037500371110201100991001001000010000071011611498220100001005003850038500385003850038

1000 unrolls and 10 iterations

Result (median cycles for code): 5.0037

retire (01)cycle (02)030b1e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100245003737400168398292510010101000010100005017862590500185003750037485133487671001020100002030000500375003711100211091010100001003000640416564982410000105003850038500385003850038
100245003737400581398292510010101000010100005017862590500185003750037485133487671001020100002030000500375003711100211091010100001000000640616654982410000105003850038500385003850038
10024500373750084398292510010101000010100005017862590500185003750037485133487671001020100602030000500375003711100211091010100001010000640616564982410000105003850038500385003850038
1002450037374006553982925100101010000101000050178625905001850037500374851311487671001020100002030000500375003711100211091010100001000000640616644982410000105003850038500385003850038
100245003737500509398292510010101000010100005017862590500185003750037485133487671001020100002030000500375003711100211091010100001000000640516664982410000105003850038500385003850038
100245003737500551398292510010101000010100005017862590500185003750037485133487671001020100002030000500375003711100211091010100001000000640616564982410000105003850038500385003850038
100245003737500739398292510010101000010100005017862590500185003750037485133487671001020100002030000500375003711100211091010100001000000640516664982410000105003850038500385003850038
100245003737500265398292510010101000010100005017862590500185003750037485133487671001020100002030000500375003711100211091010100001000000640516664982410000105003850038500385003850038
100245003737500546398292510010101000010100005017862590500185003750037485133487671001020100002030000500375003711100211091010100001000000640616464982410000105003850038500385003850038
100245003737400509398292510010101000010100005017862590500185003750037485133487671001020100002030000500845003711100211091010100001000000640616564982410000105003850038500385003850038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  sha1c q0, s8, v9.4s
  movi v1.16b, 0
  sha1c q1, s8, v9.4s
  movi v2.16b, 0
  sha1c q2, s8, v9.4s
  movi v3.16b, 0
  sha1c q3, s8, v9.4s
  movi v4.16b, 0
  sha1c q4, s8, v9.4s
  movi v5.16b, 0
  sha1c q5, s8, v9.4s
  movi v6.16b, 0
  sha1c q6, s8, v9.4s
  movi v7.16b, 0
  sha1c q7, s8, v9.4s
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 4.0005

retire (01)cycle (02)030407080b1e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a7a8a9acc2branch mispredict (cb)cdcfd5d6daddinst fetch restart (de)e0? int output thing (e9)ec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020432003823980300001032398652580100100800001008000050011436895132001903200383200382997903299996801002028000020024062432003832003811160201100991001001600001000004000001018091609431988600160000100320039320039320039320039320039
1602043200382397000021072623986510480100100800001008000050011436895132001903200383200382997903300103801002008000020024063032003832003811160201100991001001600001000004230001011071607831988600160000100320039320039320039320039320039
1602043200382397000000111423986510180100100800001008000050011436895132001903200383200382997903299996801002008000020024000032003832003811160201100991001001600001000001030001011041609931988600160000100320039320039320039320233320039
160204320038239710010017242398052580100100800001008000050011436895132001903200383202322997903299996801002008000020024000032003832003811160201100991001001600001002010056820001011084909931988600160000100320039320231320039320039320039
160204320231239700000072623986525801001038000010080000500114368951320019032003832003829983733001038010020080000200240000320038320038111602011009910010016000010000000000010110916016931988600160000100320039320039320137320185320039
160204320038239700004809782398652580100100800001008000050011436895132001903200383200382997903299996801002008000020024000032003832003811160201100991001001600001000001030001011081608432003400160000100320039320039320039320039320039
1602043200382397000010207262398652580100100800001008000050011436895132001903200383200382997903299996801002008000020024000032003832003811160201100991001001600001000000000001011091609931988600160000100320039320039320039320039320039
16020432003823980000007262398652580100100800001008000050011436895132001903200383200382997903299996801002008000020024000032003832003811160201100991001001600001000000000001011091609431988600160000100320039320039320039320039320039
160204320038239700003006123986525801001008000010080000500114368951320019032003832003829979032999968010020080000200240000320038320038111602011009910010016000010000000000010110101609931988600160000100320039320039320039320039320039
1602043200382397000000612398652580100100800001008000050011436895132001903200383200382997903299996801002008000020024000032003832023111160201100991001001600001000000000001011091609431988600160000100320039320039320039320232320039

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 4.0005

retire (01)cycle (02)0307090a0b18191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2c5branch mispredict (cb)cdcfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024320038239701000006723986525800351080000108000050114379711153200190320038320038299857330009980171208021020240000320038320038511600211091010160000100000000000010022113181611124319887021191516000010320039320039320039320234320039
160024320038239900000005422398652580010108000010800005011436895115320019032003832003829983533000188001020800002024000032003832008621160021109101016000010000200300001002283181611162319887021191516000010320039320039320039320039320039
16002432003824250001000672398652580035108000010800005011436895115320019032003832003829981233000188001020800002024000032003832003811160021109101016000010000000000001002283141611144319887021191516000010320039320234320092320039320039
160024320038239700000407322398652580010108000010800005011436895115320019032003832003829981233000188017020800002024000032003832003811160021109101016000010000010000001002283141611142319887021191516000010320039320039320039320039320039
160024320038239700000096723986525800101080000108000050114368951153200190320038320038299812330001880010208000020240174320038320038111600211091010160000100000000000010022113141611262319887021193016000010320039320039320039320039320039
16002432003823970000000672398652580010108000010800005011436895115320019032003832003829981233000188001020800002024000032003832003811160021109101016000010000000000001002283121611142319887021191516000010320039320039320039320039320039
1600243200382397000000022382398652580010108000010800005011436895115320019332003832003829981233000188001020800002024000032003832003811160021109101016000010000000000011002283141611124319887021191516000010320039320039320039320233320039
1600243200382397000000024712398652580036108000010800005011436895115320019032003832003829981233000188001020800002024000032023232003811160021109101016000010000010300001002283161611145319887021191516000010320039320039320039320039320039
1600243200382398000000126723986525800101080024118000050114368951153200190322120322063300241194301259818932082573202461593223043219234911600211091010160000100200122120820001166183111737111811322598121201516000010322943323424323074323515325215
16002432496324346121636519833272398502580010108000010800005011436895015320019032003832003829981233000188001020800002024000032003832003811160021109101016000010000000000001002283141611142319887021191516000010320039320039320039320039320039

Test 6: throughput

Count: 16

Code:

  sha1c q0, s16, v17.4s
  sha1c q1, s16, v17.4s
  sha1c q2, s16, v17.4s
  sha1c q3, s16, v17.4s
  sha1c q4, s16, v17.4s
  sha1c q5, s16, v17.4s
  sha1c q6, s16, v17.4s
  sha1c q7, s16, v17.4s
  sha1c q8, s16, v17.4s
  sha1c q9, s16, v17.4s
  sha1c q10, s16, v17.4s
  sha1c q11, s16, v17.4s
  sha1c q12, s16, v17.4s
  sha1c q13, s16, v17.4s
  sha1c q14, s16, v17.4s
  sha1c q15, s16, v17.4s
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 4.0002

retire (01)cycle (02)03080b181e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a8acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204640038479400007474798652516010010016000010016000050022876895164001906400386400386197970661999216010020016000820048002464003864003811160201100991001001600001000000011110117116006398850160000100640081640039640039640039640039
1602046400384795000337264798652516010010016000010016000050022876895164001906400386400386197970661999216010020016000820048002464008664003811160201100991001001600001000000011110117016006398850160000100640039640039640039640039640039
1602046400384794000063744798652516010010016000010016000050022876895064001906400386400386197970661999216010020016000820048002464008564003811160201100991001001600001000003011110117016006398850160000100640039640039640039640039640039
160204640038479500079817794798652516010010016000010016000050022876895164001906400386400386197973661999216010020016000820048002464003864003811160201100991001001600001004400011110117016106398850160000100640039640039640039640039640039
16020464003847940000614798652516010010016000010016000050022876895064001906400386400386197970661999216010020016000820048002464008664003811160201100991001001600001000000011110117116006398850160000100640039640039640039640087640039
1602046400384794000071914798652516010010016000010016000050022876895064001906400386400386198130661999216010020016000820048002464003864003811160201100991001001600001000000011110117016006398850160000100640039640039640039640039640039
1602046400384794000039347986525160100100160000100160000500228768951640019064003864003861979706619992160100200160008200480024640038640038111602011009910010016000010000012411110117016006398850160000100640039640076640615640184640039
1602046400384794000022994798652516012710016000010016000050022876895164001906400386400386197970661999216010020016000820048002464003864003811160201100991001001600001000000011110117016006398850160000100640039640086640039640039640039
1602046400384794000071954798653716010010016000010016000050022876895164001906400386400386197970661999216010020016000820048002464003864007211160201100991001001600001000000011110117016006398850160000100640039640039640039640039640039
16020464003847940000614798652516010010016000010016000050022876895164001906400386400386197970661999216010020016000820048002464003864003811160201100991001001600001000000011110117016106398850160000100640039640039640039640039640039

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 4.0002

retire (01)cycle (02)03070a0b18191e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2c5branch mispredict (cb)cdcfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600246400384795000000072479865251600101016000010160000502287689511640019064003864003861981203620018160010201600002048000064003864003811160021109101016000010000000600001002231121161111818639873016161316000010640087640039640039640039640085
16002464003847940000000732479865251600101016000010160040502287689511640019064003864003861981203620018160010201600002048000064003864003811160021109101016000010203000000001002232114161121717639873016161316000010640039640039640039640039640039
160024640038479400000002680479865251600101016000010160000502287689511640019064003864003861981203620018160010201600002048020464003864003811160021109101016000010000000000001002431117161111817639873016161316000010640039640039640039640039640039
16002464003847940000000732479865251600101016000010160000502287689511640019064003864003861981203620018160010201600002048000064003864003811160021109101016000010000000000001002231116161112118639873016161316000010640039640039640039640039640039
16002464003847940000000765479865251600101016000010160000502287689501640019064003864003861981203620018160010201600002048000064008564003811160021109101016000010000080040001002231116161111515639873016162616000010640039640039640039640039640039
1600246400384794000000067479865251600101016000010160000502287689511640053064003864003861981203620018160010201600002048000064003864003811160022109101016000010000010000101002431219161111917639873016161316000010640039640039640039640039640078
16002464003847940000090732479865251600101016000010160000502287689511640019064003864003861981233620018160010201600002048000064003864003811160021109101016000010400000000001015831123161211716639873016162616000010640075640039640039640039640039
160024640038479500000420753479865251600101016000010160000502287689511640019064003864003861981203620018160010201600002048000064003864003811160021109101016000010000000000001002231116161111724639873016161316000010640039640039640039640039640039
160024640038479400000390732479865251600101016000012160000502287689511640019064003864003861981203620018160010201600002048000064007964003811160021109101016000010000000000001002231120161111818639873016161316000010640039640039640039640039640039
160024640038479400000006995479865251600101016000010160000502287689511640019064003864003861981203620018160010201600002048000064003864003811160021109101016000010000000000001002231117161111617639873016161316000010640039640039640039640039640039