Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SHA1P

Test 1: uops

Code:

  sha1p q0, s1, v2.4s
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03081e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a0a6a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
1004403830001032865251000100010001398951401940384038369033896100010003000403840381110011000000073216223873100040394039403940394039
100440383000612865251000100010001398951401940384038369033896100010003000403840871110011000000073216223873100040394039403940394039
100440383100612865251000100010001398951401940384038369033896100010003000403840381110011000000073216223873100040394039403940394039
100440383000612865251000100010001398951401940384038369033896100010003000403840381110011000000073216223873100040394039403940394039
100440383000612865251000100010001398951401940384038369033896100010003000403840381110011000000073216223873100040394039403940394039
100440383000612865251000100010001401641401940384038369033896100010003000403840381110011000000073216223873100040394039403940394039
100440383000612865251000100010001398951401940384038369033896100010003000403840381110011000000073216223873100040394039403940394039
100440383100612865251000100010001398951401940384038369033896100010003000403840381110011000000073216223873100040394039403940394039
100440383000612865251000100010001398951401940384038369033896100010003000403840381110011000000073216223873100040394039403940394039
1004403831045612865251000100010001398951401940384038369033896100010003000403840381110011000000073216223873100040394039403940394039

Test 2: Latency 1->1

Code:

  sha1p q0, s1, v2.4s
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0038

retire (01)cycle (02)031e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fc5cdcfd0d5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400383000029829865251010010110000100100005001426895400194003840038385403387461010020010000200300004003840038111020110099100100100001000071004162239871100001004003940039400394003940039
10204400382990019329865251010010010000100100005001426895400194003840038385403387461010020010000200300004003840038111020110099100100100001000071002162239871100001004003940039400394003940039
10204400382990019129865251010010010000100100005001426895400194003840038385403387461010020010000200300004003840038111020110099100100100001000071012162239871100001004003940039400394003940039
10204400383000059929865251010010010000100100005001426895400194003840038385403387461010020010000200300004003840038111020110099100100100001000071012162239871100001004003940039400394003940039
10204400383000019129865251010010010000100100005001426895400194003840038385403387461010020010000200300004003840038111020110099100100100001000071012162239871100001004003940039400394003940039
10204400382990025429865251010010010000100100005001426895400194003840038385403387461010020010000200300004003840038111020110099100100100001000071012162239871100001004003940039400394003940039
10204400383000023529865251010010010000100100005001426895400194003840038385403387461010020010000200300004003840038111020110099100100100001000071012162239871100001004003940039400394003940039
10204400383000023329865251010010010000100100005001426895400194003840038385403387461010020010000200300004003840038111020110099100100100001000071013162239871100001004003940039400394003940039
10204400383000021229865251010010010000100100005001426895400194003840038385403387461010020010000200300004003840038111020110099100100100001000071012162239871100001004008840137400394003940039
10204400382990092129865251010010010000100100005001426895400194003840038385403387461010020010000200300004003840038111020110099100100100001000071012162239871100001004003940039400394003940039

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0038

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100244003830005122986525100101010000101000050142689504001940038400383856233876810010201000020300004008640038111002110910101000010000640216223991010000104003940039400394003940039
100244003830001452986525100101010000101000050142689504001940038400383856233876810010201000020300004003840038111002110910101000010000640216223987210000104003940039400394003940039
100244003830002332986525100101010000101000050142689504001940038400383856233876810010201000020300004003840038111002110910101000010000640216223987210000104003940039400394003940039
1002440038300181242986525100101010000101000050142689504001940038400383856233876810010201000020300004003840038111002110910101000010000640216223987210000104003940039400394003940039
100244003830002352986525100101010000101000050142689504001940038400383856233876810010201000020300004003840038111002110910101000010000640216223987210000104003940039400394003940039
10024400383000612986525100101010000101000050142689504001940038400383856233876810010201000020300004003840038111002110910101000010000640216223987210000104003940039400394003940039
100244003830002512986525100101010000101000050142689504001940038400383856233876810053201000020300004003840038111002110910101000010000640316223987210000104003940039400394003940039
10024400383000612986525100101010000101000050142689514001940038400383856233876810010201000020300004003840038111002110910101000010000640216223987210000104003940039400394003940039
10024400383000612986525100101010000101000050142689504001940038400383856233876810010201000020300004003840038111002110910101000010000640216233987210000104003940039400394003940039
10024400383000612986525100101010000101000050142689504001940038400383856233876810010201000020300004003840038111002110910101000010000640216223987210000104003940039400394003940039

Test 3: Latency 1->2

Code:

  sha1p q0, s0, v1.4s
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 5.0037

retire (01)cycle (02)031e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204500373750006139829251010010010000100100005001786259150018500375003748491348745101002001000020030000500375003711102011009910010010000100000071011611498240100001005003850038500385003850038
10204500373750006139829251010010010000100100435001786259150018500375003748491348745101002001018620030000500375003711102011009910010010000100000371011611498220100001005003850038500385008550038
10204500373750006139829251010010010000100100005001786259050018500375003748491348745101002001000020030000500375003711102011009910010010000100000071431723498240100001005003850038500385003850038
10204500373750006139829251010010010000100100005001786259150018500375003748491348745101002001000020030000500375003711102011009910010010000100000071011611498220100001005003850038500385003850038
10204500373750006139829251010010010000100100005001786259050018500375003748491348745101002001000020030000500375003711102011009910010010000100000071411623498240100001005003850038500385003850038
10204500373750016139829251010010010000100100005001786259150018500375003748491348745101002001000020030000500375003711102011009910010010000100000071011711498220100001005003850038500385003850038
10204500373750006139829251010010010000100100005001786259050018500375003748491348745101002001005420030000500375003711102011009910010010000100000071011611498220100001005003850038500385003850038
102045003737500061398292510100100100001001000050017862591500185003750037484913487451010020010000200300005003750037111020110099100100100001000000712217124982425100001005003850038500385003850038
102045003737500025139829251010010010000100100005001786259050018500375003748491348745101002001000020030000500375003711102011009910010010000100000071011611498220100001005003850038500385003850038
10204500373750006139829251012512510000100100006261786259150018500375003748491348745101002001000020030000500375003711102011009910010010000100000071411611498220100001005003850038500385003850038

1000 unrolls and 10 iterations

Result (median cycles for code): 5.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024500373750103398292510010101000010100005017862595001850037500374851334876710010201000020300005003750037111002110910101000010006404163449824010000105003850038500385003850038
10024500373750102398292510010101000010100005017862595001850037500374851334876710010201000020300005003750037111002110910101000010006403163349824010000105003850038500385003850038
10024500373750441398292510010101000010100005017862595001850037500374851334876710010201000020300005003750037111002110910101000010006403163349824010000105003850038500385003850038
1002450037375061398292510010101000010100005017862595001850037500374851334876710010201000020300005003750037111002110910101000010106403163349824010000105003850038500385003850038
1002450037375061398292510010101000010100005017862595001850037500374851334876710010201000020300005003750037111002110910101000010006403163349824010000105003850038500385003850038
1002450037375061398292510010101000010100005017862595001850037500374851334876710010201000020300005003750037111002110910101000010006403163349824010000105003850038500385003850038
1002450037375061398292510010101000013100005017865245001850037500374851334876710050201000020300005003750037111002110910101000010106403163149824010000105003850038500385003850038
1002450037375061398292510010101000010100005017862595001850084500374851334876710010201000020300005003750037111002110910101000010036403163349824110000105003850038500385003850038
1002450037374961398292510010101000010100005017862595001850037500374851334876710010201000020300005007050037111002110910101000010006403163349824010000105003850038500385003850038
1002450037375061398292510010101000010100005017862595001850037500374851334876710010201000020300005003750037111002110910101000010006403163349824010000105003850038500385003850038

Test 4: Latency 1->3

Code:

  sha1p q0, s1, v0.4s
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 5.0037

retire (01)cycle (02)0309191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8acc2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020450037375000061398292510100100100001001000050017862591500180500375003748491348745101002001000020030000500375003711102011009910010010000100000000071021611498220100001005003850038500385003850038
1020450037374000061398292510100100100001001000050017862591500180500375003748491348745101002001000020030000500375003711102011009910010010000100000000071011611498220100001005003850038500385003850038
1020450037375000061398142510100100100101001000059117862591500530500845008548491748777101002001000020030000500375003711102011009910010010000100000100071011611498220100001005003850038500385003850038
10204500373740000103398292510100100100001001000050017862591500180500375003748491348745101002001000020030000500375003711102011009910010010000100000000071011611498220100001005003850038500385003850038
1020450037374020108550398142510100110100051001003950017862591500180500845003748491348745101002001000020030000500375003711102011009910010010000100000130071011611498220100001005003850038500385003850038
10204500373750000103398292510100100100001001000050017862591500180500375003748491348745101002001000020030000500375003711102011009910010010000100000132071011611498220100001005003850038500385003850038
10204500373750045082398292510100100100001001000050017862591500180500375003748491348745101002001000020030000500375003711102011009910010010000100000000071011611498220100001005003850038500385003850038
1020450037375000061398292510100100100001001000050017862591500650500375003748491348745101002001000020030000500375003711102011009910010010000100000000071011611498220100001005003850038500385003850038
1020450037374000061398152510100100100001001000050017862591500180500375003748491348745101002001000020030000500375003711102011009910010010000100000000071011611498220100001005003850038500385003850038
10204500373750000726398292510100100100001001000050017862591500180500375003748491348745101002001000020030000500375003711102011009910010010000100000000071011611498220100001005003850038500385003850038

1000 unrolls and 10 iterations

Result (median cycles for code): 5.0037

retire (01)cycle (02)030818191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)ec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100245008437500065406139829251001010100001010000501786259050018500375003748513034876710010201000020300005003750037111002110910101000010000064021622498240010000105003850038500385003850038
1002450037376000006139829251001010100001010000501786259150018500375003748513034876710010201000020300005003750037111002110910101000010000064021622498240010000105003850038500385003850038
1002450037375000006139829251001010100001010000501786259150018500375003748513034876710010201000020300005003750037111002110910101000010000064021622498240010000105003850038500385003850038
1002450037374000006139829251001010100001010000501786259150018500375003748513034876710010201000020300005003750037111002110910101000010000064021622498240010000105003850038500385003850038
10024500373740000010739829251001010100001010000501786259150018500375003748513034876710010201000020300005003750037111002110910101000010000064021622498240010000105003850038500385003850038
10024500373740000099339829251001710100051010000601786524150018500375003748513034876710010201000020300005003750037111002110910101000010221733064021622498240010000105003850038500385003850038
1002450037375000606139829251001010100001010000501786259150018500375003748513034876710010201000020300005003750037111002110910101000010000064021622498240010000105003850038500385008550038
10024500373761201206139829251002110100001010000501786259150018500375003748513034876710010201000020300005003750037111002110910101000010003064021622498240010000105003850038500385003850038
10024500373750000010339829251001010100001010000501786259150018500375003748513034876710010201000020300005003750037111002110910101000010000064021622499630010000105003850038500385003850228
1002450037375000006139829251001010100001010000501786259050053500375003748513034876710010201000020300005003750037111002110910101000010000064021622498240010000105003850038500385003850038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  sha1p q0, s8, v9.4s
  movi v1.16b, 0
  sha1p q1, s8, v9.4s
  movi v2.16b, 0
  sha1p q2, s8, v9.4s
  movi v3.16b, 0
  sha1p q3, s8, v9.4s
  movi v4.16b, 0
  sha1p q4, s8, v9.4s
  movi v5.16b, 0
  sha1p q5, s8, v9.4s
  movi v6.16b, 0
  sha1p q6, s8, v9.4s
  movi v7.16b, 0
  sha1p q7, s8, v9.4s
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 4.0005

retire (01)cycle (02)0307080a0b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9accdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020432003823970000214207262398652580100100800001008000050011436895132001903200383200382997903299996801002008000020024000032003832003811160201100991001001600001000000000010110116113198860160000100320039320039320039320039320039
1602043200382397000000007262398202580100100800001008000050011436895132001903200383200382997903299996801002008000020024000032003832003811160201100991001001600001000000000010110116113198860160000100320039320039320039320039320280
160204320038239801000000258223986525880100100800001038000050011436895132001903200383200382997903299996801002008000020024000032003832003811160201100991001001600001000000000010110116113198860160000100320039320039320039320039320039
1602043200382398000000001152398652580100100800061008000050011436895132001903200383200382997903299996801002008000020024000032003832003811160201100991001001600001000000000010110116113198860160000100320039320039320039320039320039
160204320038239800000036069832398652580100100800001008000050011436895132001903200383200382997903299996801002008000020024000032003832003811160201100991001001600001000000000010110116113199640160000100320039320039320039320039320039
1602043200382398000000003442398652580100100800001008000050011436895132001903200383200382997903299996801002008000020024000032003832003811160201100991001001600001000000000010110116113198860160000100320039320039320039320039320039
1602043200382397000000007262398652580100100800001008000050011436895132001903200383200382997903299996801002008000020024000032003832003811160201100991001001600001000000000010110116113198860160000100320039320039320039320039320039
1602043200382397000000006123986525801001008000010080000500114368951320019032003832003829979029299996801002008000020024000032003832003811160201100991001001600001000000000010110117113198860160000100320039320039320039320039320039
1602043200382397000000007262398652580100100800001008000050011436895132001903200383200382997903299996801002008000020024000032003832003811160201100991001001600001000000000010110216113199590160000100320097320039320039320039320039
16020432003823970000007590612398652580100100800001008000050011436895132001903200383200382997903299996801002008000020024000032003832003811160202100991001001600001000000000010110116113198860160000100320039320039320039320039320039

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 4.0005

retire (01)cycle (02)0304080b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8a9acc2branch mispredict (cb)cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002432003823970000000067239865548001010800001080000501143689511532001932003832003829981233000188001020800002024000032003832003811160021109101016000010000000010022821121612202413319887016141316000010320039320039320039320039320039
1600243200382397000000009622398652580010108000010800005011437166015320019320038320038299812330001880010208000020240000320038320038111600211091010160000100000000100241132251622182126319887032142616000010320039320039320039320039320039
1600243200382398000000001002398652580010108000010800005011436895015320019320038320038299812330001880010208000020240000320038320038111600211091010160000100000000100241132251622152629319887032282616000010320039320039320039320039320039
160024320038239800000000738239865258001010800001080000501143689501532001932003832003829981233000188001020800002024000032003832003811160021109101016000010003000010022821122411141227319922032282616000010320039320039320039320039320039
160024320038239700000570067239865258001010800001080000501143689501532001932003832003829981233000188001020800002024000032003832003811160021109101016000010000000010022822131611152511319887032281316000010320039320039320039320039320039
160024320038239700000000765239865258001010800001080000501143689511532001932003832003829981233000188001020800002024000032003832003811160021109101016000010000000010022825131612151127319887016141316000010320039320039320039320039320039
160024320038239800000000962398502580010108000010800005011436895015320019320038320038299812283000188001020800002024000032003832003811160021109101016000010000000010024832101622182528319887032282616000010320039320039320039320232320039
16002432003823970000000017512398659980010108000010800005011436895015320019320038320038299812330001880010208000020240000320038320038111600211091010160000100000000100241142251622162513319887032282616000010320039320039320039320039320088
160024320038239700000300738239865258001010800001080000501143689501532001932003832003829981233000188001020800002024000032003832003811160021109101016000010200000010096842251622152613319887016282616000010320039320039320039320235320039
16002432003823970010412352014062398652580010108000010801205011436895015320019320038320038299812183000188001020800002024000032003832003811160021109101016000010004004010093832124922161927319887032282616000010320039320039320039320039320039

Test 6: throughput

Count: 16

Code:

  sha1p q0, s16, v17.4s
  sha1p q1, s16, v17.4s
  sha1p q2, s16, v17.4s
  sha1p q3, s16, v17.4s
  sha1p q4, s16, v17.4s
  sha1p q5, s16, v17.4s
  sha1p q6, s16, v17.4s
  sha1p q7, s16, v17.4s
  sha1p q8, s16, v17.4s
  sha1p q9, s16, v17.4s
  sha1p q10, s16, v17.4s
  sha1p q11, s16, v17.4s
  sha1p q12, s16, v17.4s
  sha1p q13, s16, v17.4s
  sha1p q14, s16, v17.4s
  sha1p q15, s16, v17.4s
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 4.0002

retire (01)cycle (02)030708090b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204640075479400000066300536479865251601001001600001001600005002287689506400673640038640038619790036199961601482001600002004800006400386400381116020110099100100160000100000020010110117116398730160000100640039640087640039640076640039
1602046400384795000000000726479865251601001001600001001600005002287689516400190640038640038619790036199961601002001600002004802166400386400381116020110099100100160000100000000010110116116398730160000100640039640039640039640039640039
16020464003847940000106006947479865251601001001600001001600005002287689516400190640038640038619790036199961601002001600002004800006400386400381116020110099100100160000100100001010110116116398730160000100640039640039640039640039640039
1602046400384795000000930072647986525160100100160000100160000500228768950640019064003864003861979003619996160100200160000200480000640038640038111602011009910010016000010000237000010110116116398730160000100640039640039640039640039640666
1602046401854800100113394000726479847251601001001600001001600005002287689506400190640038640038619790036199961601002001600002004800006400386400381116020110099100100160000100000000010110116116398730160000100640039640039640039640039640039
1602046400384794000000000726479865251601001001600001001600005002287689506400190640038640038619790036199961601002001600002004800006400386400861116020110099100100160000100000000010110116116398730160000100640039640039640039640039640039
1602056400384794000000000726479865251601001001600001001600005002287689506400190640038640038619790036199961601822001600002004800006400386400381116020110099100100160000100009000010110116116398730160000100640039640039640039640077640039
160204640038479400000000061479865251601001001600001001600005002287716616400190640038640038619790036199961601002001600002004800006400386400381116020210099100100160000100000000010110016116398730160000100640039640039640039640039640039
1602046400384794000000000747479865251601001001600001001600005002287689506400190640038640038619790036199961601002001600722004800006400386400382116020110099100100160000100000000010110117116398730160000100640039640039640039640039640039
1602046400384794000000000726479865251601001001600001001600005002287689516400190640038640038619790036200321601002001600002004800006400386400381116020110099100100160000100000000010110116116398730160000100640039640039640039640039640039

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 4.0002

retire (01)cycle (02)030408090b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8a9acc2cdcfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600246400384794000000001337479865251600161016000010160000502287689511064001906400386400386198120362001816001020160000204800006400386400862116002110910101600001000000010022331042161413722639873016161316000010640039640039640039640039640039
1600246400384794101000002061479865251600101016000010160000502287689511064001906400386400386198120362001816001020160068204802046400386400381116002110910101600001000000110022331017161112534639873016161316000010640039640039640039640039640039
160024640038479410100000781479865251600101016000010160000502287689511064001906400386400386198120362001816001020160000204800006400386400381116002110910101600001008000010023331125181111728639873016161316000010640039640039640039640231640087
160024640038479410100000732479865251600101016000010160000502287689511064001936400386400386198123362001816001020160000204800006400386400381116002110910101600001000000010023331126161112624639873016162616000010640039640039640039640039640039
1600246400384795101000120116479865251600101016000010160000502287689511064001906400386400386198120362001816001020160000204800006400386400381116002110910101600001000000010022331025161111625639873016161316000010640039640039640079640039640039
160025640038479400000000732479865251600101016000010160000502287716511064001906400386400386198120362001816001020160000204800006400386400381116002210910101600001002000010023641132161112228639873232162616000010640039640232640039640039640232
160024640038479410104000781479865251600351016000010160000502287689511064016306400386402296198870362015016001020160000204806846400386400384116002110910101600001001034010023311126161111824639873116161316000010640039640232640039640231640039
1600246400384794101004007814798651041600101016002410160000502287770211064016306400386400386198120362001816001020160000204800006400386400385116002110910101600001000056670010084331126161212225639873016161316000010640039640232640231640039640039
160024640231479411100000781479865251600101016000010160160502287689501064001906400386400386198690362001816001020160000204806786400386400385116002110910101600001000000010083331125161112627639873016161316000010640039640039640039640039640039
160024640038479510100000781479865251600101116000010160000502287689511064001906400386400386198120362015116001020160000204800006400386400381116002110910101600001000000010103331128161222716639873116161316000010640039640039640232640039640039