Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SHA1M

Test 1: uops

Code:

  sha1m q0, s1, v2.4s
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4e5051inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
100440383096128650251000100010001398954019403840383690338961000100030004038403811100110000073216223873100040394039403940394039
100440383106128654022251000100010001398954019403840383690338961000100030004038403811100110000073216223873100040394039403940394039
1004403830876128654022251000100010001398954019403840383690338961000100030004038403811100110000073216223873100040394039403940394039
100440383006128654022251000100010001398954019403840383690338961000100030004038403811100110000073216223873100040394039403940394039
1004403830010328650251000100010001398954019403840383690338961000100030004038403811100110000073216223873100040394039403940394039
100440383006128650251000100010001398954019403840383690338961000100030004038403811100110000073216223873100040394039403940394039
100440383006128650251000100010001398954019403840383690338961000100030004038403811100110000073216223873100040394039403940394039
100440383006128650251000100010001398954019403840383690338961000100030004038403811100110000073216223873100040394039403940394039
100440383006128650251000100010001398954019403840383690338961000100030004038403811100110000073216223873100040394039403940394039
100440383006128650251000100010001398954019403840383690338961000100030004038403811100110000073216223873100040394039403940394039

Test 2: Latency 1->1

Code:

  sha1m q0, s1, v2.4s
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0038

retire (01)cycle (02)03070a1e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a7a8a9accdcfd0d5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204400383000000612986525101001001000010010000500142689504001940086400383854033874610100200100002003000040038400381110201100991001001000010000520071003163339871100001004003940039400394003940039
10204400383000000612986525101001001000010010000500142689514001940038400383854033874610100200100002003000040038400381110201100991001001000010000000071013163339871100001004003940039400394003940039
1020440038299000072629865251010010010000100100005001426895040019400384003838540338746101002001000020030000400384003811102011009910010010000100003000071213163439871100001004003940039400394003940039
10204400383000000612986525101001001000010010000500142689514001940038400383854033874610100200100002003000040038400381110201100991001001000010000000071013163339871100001004003940039400394003940039
10204400383000000612986525101001001000010010000500142689504001940038400383854033874610100200100002003000040038400381110201100991001001000010000200071013163339871100001004003940039400394003940039
1020440038300001470612986525101001001000010010000500142689504001940038400383854033874610100200100002003000040038400381110201100991001001000010020000071013243339871100001004003940039400394003940039
10204400822990000612986525101001001000010010000500142689514001940038400383854033874610100200100002003000040038400381110201100991001001000010000000071013163339871100001004003940039400394003940039
10204400382990000612986525101001001000010010000500142689504001940038400383854033874610100200100002003000040038400381110201100991001001000010000000071013163339871100001004003940039400394003940039
10204400383000001612986525101001001000010010000500142689504001940038400383854033874610100200100002003000040038400381110201100991001001000010000100071213163339871100001004003940039400394003940039
10204400383000000612986525101001001000010010000500142689514001940038400383854033874610100200100002003000040038400381110201100991001001000010000600071013163339871100001004003940039400394003940039

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0038

retire (01)cycle (02)03091e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa7a8acc5cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024400383000061298652510010101000010100005014268951400194003840038385623387681001020100002030000400384003811100211091010100001002960640516333987210000104003940039400394003940039
10024400383000061298652510010101000010100005014268950400194003840038385623387681001020100002030000400384003811100211091010100001003030640316433987210000104003940039400394003940039
1002440038300006129865251001010100001010000501426895040019400384003838562338768100102010000203000040038400381110021109101010000100200640316343987210000104003940039400394003940039
10024400383000072629865251001010100001010000501426895040019400384003838562338768100102010000203000040038400381110021109101010000100200640316343987210000104003940039400394003940039
1002440038299006129865251001010100001010000501426895040019400384003838562338768100102010000203000040038400381110021109101010000100100640316433987210000104003940039400394003940039
1002440038300006129865251001010100001010000501426895040019400384003838562338768100102010000203000040038400381110021109101010000100221260640316533987210000104003940039400394003940039
1002440038299006129865251001010100001010000501426895140019400384003838562338768100102010000203000040038400381110021109101010000100100640316353994610000104003940039400394003940039
1002440038299006129865251001010100001010000501426895040019400384003838562338768100102010000203000040038400381110021109101010000100100640316353987210000104003940039400394003940039
1002440038300006129865251001010100001010000501426895140019400384003838562338768100102010000203000040038400381110021109101010000100100640316343987210000104003940039400394003940039
1002440038300006129865251001010100001010000501426895140019400384003838562338768100102010068203000040038400381110021109101010000100100640316343987210000104003940039400394003940039

Test 3: Latency 1->2

Code:

  sha1m q0, s0, v1.4s
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 5.0037

retire (01)cycle (02)031e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102045003737400613982925101001001000010010000500178625950018500375003748491348745101002001000020030000500375003711102011009910010010000100007101161149822100001005003850038500385003850038
102045003737500613982925101001001000010010000500178625950018500375003748491348745101002001000020030000500375003711102011009910010010000100007101161149822100001005003850038500385003850038
102045003737500823982925101001001000010010000500178625950018500375003748491348745101002001000020030000500375003711102011009910010010000100007101161149822100001005003850038500385003850038
102045003737500613982925101001001000010010000500178625950018500375003748491348745101002001000020030000500375003711102011009910010010000100007101161149822100001005003850038500385003850038
102045003737500613982925101001001000010010000500178625950018500375003748491348745101002001000020030000500375003711102011009910010010000100007101161149822100001005003850038500385003850038
102045003737500613982925101001001000010010000500178625950018500375003748491348745101002001000020030000500375003711102011009910010010000100007101161149822100001005003850038500385003850038
1020450037374001053982925101001001000010010000500178625950018500375003748491348745101002001000020030000500375003711102011009910010010000100007101161149822100001005003850038500385003850038
1020450037375001033982925101001001000010010000500178625950018500375003748491348745101002001000020030000500375003711102011009910010010000100007101161149822100001005003850038500385003850038
102045003737500613982925101001001000010010000500178625950018500375003748491348745101002001000020030000500375003711102011009910010010000100007101161149822100001005003850038500385003850038
102045003737500613982925101001001000010010000500178625950018500375003748491348745101002001000020030000500375003711102011009910010010000100007101161149822100001005003850038500385003850038

1000 unrolls and 10 iterations

Result (median cycles for code): 5.0037

retire (01)cycle (02)03080b181e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002450037375000061398292510010101000010100005017862590500180500375003748513034876710010201000020300005003750037111002110910101000010000640216224982410000105003850038500385003850038
1002450037374000061398292510010101000010100005017862591500180500375003748513034876710010201000020300005003750037111002110910101000010003640216224982410000105003850038500385003850038
1002450037375000061398292510010101000010100005017862590500183500375003748513034876710010201000020300005003750037111002110910101000010000640216224982410000105003850038500385003850038
10024500373750009107398292510010101000010100005017862591500180500375003748513034876710010201000020300005003750037111002110910101000010000640216224982410000105003850038500385003850038
1002450037375000061398292510010101000010100005017862591500180500375003748513034876710010201000020300005003750037111002110910101000010000640216224982410000105003850038500385003850038
1002450037375000061398292510010101000010100005017862590500180500375003748513034876710010201000020300005003750037111002110910101000010000640316234982410000105003850038500385003850038
100245003737500012103398292510010101000010100005017862590500180500375003748513034876710010201000020300005003750037111002110910101000010003640216224982410000105008550038500385003850038
10024500373750110631398292510010101001012100005017862590500180500375003748513034876710010201000020300005003750037111002110910101000010000640216324982410000105003850038500385003850038
10024500373750001261398292510010101000010100005017862591500180500375003748513034876710010201000020300005003750037111002110910101000010003640216224982410000105003850038500385003850038
1002450037375000396541398292510010101000010100005017862590500180500375003748513034876710010201000020300005003750037111002110910101000010000640216234982410000105003850038500385003850038

Test 4: Latency 1->3

Code:

  sha1m q0, s1, v0.4s
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 5.0037

retire (01)cycle (02)03181e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8a9acc2c5branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102045003737400613982925101001001000010010000500178625905001850037500374849134874510100200100002003000050037500371110201100991001001000010000000007101161149822100001005003850038500385003850038
102045003737500613982925101001001000010010000500178625905001850037500374849134874510100200100002003000050230500371110201100991001001000010000000007101161149822100001005003850038500385003850038
102045003737500613982925101001001000010010000500178625905001850037500374849134874510100200100002003000050037500371110201100991001001000010000000007101161149822100001005003850038500385003850038
102045003737500613982925101001001000010010000500178625905001850037500374849134874510100200100002003000050037500371110201100991001001000010000000007101161149822100001005003850038500385003850038
102045003737500613982925101001001000010010000500178625905001850037500374849134874510100200100002003000050037500371110201100991001001000010000000007101161149822100001005003850085500385003850038
1020450037375007263982925101001001000010010000500178625905001850037500374849134874510100200100002003000050037500371110201100991001001000010000000007101161149822100001005003850038500385003850038
102045003737400613982925101001001000010010000500178625905001850037500374850934874510100200100002003000050037500371110201100991001001000010030430007101161149822100001005003850038500385003850038
102045003737400613982925101001001000010010000500178625905001850037500374849134874510100200100002003000050037500371110201100991001001000010000000007101161149822100001005003850038500385003850038
102045003737500613982925101001001000010010000500178625915001850037500374849134874510100200100002003000050037500371110201100991001001000010000000007101161149822100001005003850038500385003850038
102045003737500613982925101001001000010010000500178625905001850037500374849134874510100200100002003000050037500371110201100991001001000010000000007101161149822100001005003850038500385008550133

1000 unrolls and 10 iterations

Result (median cycles for code): 5.0037

retire (01)cycle (02)0318191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100245003737400006139829251001010100001010000501786259050018500375003748513348767100102010000203000050037500371110021109101010000100640006402162249824010000105003850038500385003850038
10024500373740000613982925100101010000101000050178625905001850037500374851334876710010201000020300005003750037111002110910101000010000006402162249824010000105003850038500385003850038
10024500373750000613982925100101010000101000050178625905001850037500374851334876710010201000020300005022750037111002110910101000010007006402162249824010000105003850038500385003850038
10024500373750000613982925100101010004101000050178640505001850037500374851334876710010201000020300005003750037111002110910101000010000006402162249824010000105003850038500385003850038
10024500373750000613982925100101010000101000050178625905001850037500374851334876710010201000020300005003750037111002110910101000010000006402162249824010000105003850038500385003850038
10024500373750030613982925100101010000101000050178625905001850037500374851334876710010201000020300005003750037111002110910101000010000006402162449824010000105003850038500385003850038
10024500373750000613982925100101010000101000050178625905001850037500374851334876710010201000020300005003750037111002110910101000010000606402162249824010000105003850038500385003850038
10024500373750000613982925100101010000101000050178625905001850037500374851334876710010201000020300005022750037111002110910101000010000006402162249824010000105003850038500385003850038
10024500373750000613982925100101010000101000050178625905001850037500374851334876710010201000020300005003750037111002110910101000010010006402162249824010000105003850038500385003850038
100245003737500002513982925100101010000101000050178625905001850037500374851334876710010201000020300005003750037111002110910101000010000006402162249824010000105003850038500385003850038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  sha1m q0, s8, v9.4s
  movi v1.16b, 0
  sha1m q1, s8, v9.4s
  movi v2.16b, 0
  sha1m q2, s8, v9.4s
  movi v3.16b, 0
  sha1m q3, s8, v9.4s
  movi v4.16b, 0
  sha1m q4, s8, v9.4s
  movi v5.16b, 0
  sha1m q5, s8, v9.4s
  movi v6.16b, 0
  sha1m q6, s8, v9.4s
  movi v7.16b, 0
  sha1m q7, s8, v9.4s
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 4.0005

retire (01)cycle (02)03070a0b1e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a8a9acc2c5cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204320038239700000270923986525801001008000010080000500114368951320019320038320038299790329999680100200800002002400003200383200381116020110099100100160000100000000010110416113198860160000100320039320039320039320039320039
1602043200382397000006123986525801001008000010080000500114368951320019320038320038299790329999680100200800002002400003200383200381116020110099100100160000100000000010110116113198860160000100320039320039320039320039320039
1602043200382397000006123986525801001008000010080000500114368950320019320038320038299790329999680100200800002002400003200383200381116020110099100100160000100000000010110116113198860160000100320039320039320039320039320039
160204320038239700000612398652580100100800001008000050011436895132001932003832003829979032999968010020080000200240000320096320038111602011009910010016000010010322850010110116113198860160000100320039320039320097320039320039
1602043200382398000004412398652580100100800001008000050011436895032001932003832003829979032999968010020080000200240000320038320038111602011009910010016000010000003690010110116113198860160000100320039320039320039320039320039
160204320038239800000203623986525801001008000010080000500114368951320019320038320038299790329999680100200800002002400003200383200381116020110099100100160000100000000010110117113198860160000100320039320039320096320039320039
1602043200382398000007262398652580100100800001008000050011436895132001932003832003829979032999968010020080000200240000320038320038111602011009910010016000010000004050010110116113198860160000100320039320097320039320039320039
160204320096239700000212239865258010010080000100800005001143689513200193200383200382997903299996801002008000020024000032003832003811160201100991001001600001000092000010110116113198860160000100320039320039320039320039320039
1602043200382397000006123986525801001008000010080000500114368951320019320038320038299826329999680100200800002002400003200383200951116020110099100100160000100000000010110116113198860160000100320039320039320039320039320039
160204320038239700000612398652580100106800001008000050011436895132001932003832009529979032999968010020080000200240000320038320038211602011009910010016000010000181000010110116113198860160000100320039320039320039320039320039

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 4.0005

retire (01)cycle (02)030708090b191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8a9acc5cdcfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0eaebec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024320376240621110198008372398652580010108000010800005011436895113200193200383200382998123300018800102080000202400003200383200381116002110910101600001000000010022311231611116631988721191516000010320039320039320039320039320039
16002432003823980000000067239835258001010800001080000501143689511320019320038320038299812330001880010208000020240000320038320038111600211091010160000100000001009231161611161631988721191516000010320039320039320039320039320039
16002432003823970000000067239865258001010800001080000501143689511320019320038320038299812330001880010208000020240000320038320038111600211091010160000100003001002231161611161631988721191516000010320039320039320232320039320039
1600243200382397000003007322398652580010108000010800005011436895113200193200383200382998123300018800102080000202400003200383200381116002110910101600001000030010022311616111161731988721191516000010320039320039320039320039320137
1600243200382398000000006723986525800101080000108000050114368951132001932003832003829983633000188001020800002024000032003832013511160021109101016000010000000100223111616111171731988721191516000010320039320039320039320039320039
16002432003823980000000067239865258001010800001080000501143689511320019320038320038299812330001880010208000020240000320038320038111600211091010160000100103001002231161611116631988721191516000010320039320039320039320089320039
1600243200382397000000007322398652580010108000010800005011437165113200193200383200382998123300018800502080000202400003200383200381116002110910101600001000000010022311161611161631988721381516000010320039320039320039320039320039
160024320038239700000000672398652580010108000010800005011436895113200193200383200382998123300018800102080000202400003200383200381116002110910101600001000000010022311161614116631988721191516000010320039320039320039320039320039
1600243200382397000006007322398652580010108000010800005011436895113200193200383200382998123300018800102080000202400003200383200381116002110910101600001001700001002231161611117731988721191516000010320039320039320039320039320039
1600243200382397000000006723986525800101080000108000050114368951132001932003832003829981233000188001020800002024018032003832003811160021109101016000010000000100223111616111121531988721191516000010320039320039320039320087320039

Test 6: throughput

Count: 16

Code:

  sha1m q0, s16, v17.4s
  sha1m q1, s16, v17.4s
  sha1m q2, s16, v17.4s
  sha1m q3, s16, v17.4s
  sha1m q4, s16, v17.4s
  sha1m q5, s16, v17.4s
  sha1m q6, s16, v17.4s
  sha1m q7, s16, v17.4s
  sha1m q8, s16, v17.4s
  sha1m q9, s16, v17.4s
  sha1m q10, s16, v17.4s
  sha1m q11, s16, v17.4s
  sha1m q12, s16, v17.4s
  sha1m q13, s16, v17.4s
  sha1m q14, s16, v17.4s
  sha1m q15, s16, v17.4s
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 4.0002

retire (01)cycle (02)030409191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2c5branch mispredict (cb)cdcfd2icache miss (d3)d5d6d9ddinst fetch restart (de)e0? int output thing (e9)eaec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602046400384794000006147986525160129100160000100160000590228768951640019064003864003861979003619996160100200160000200480000640038640075111602011009910010016000010000001018900001011000916099639873000160000100640039640039640039640039640085
1602046400384864000120072647986543160100100160000100160000500228768951640019064003864003861979003619996160100200160000200480000640038640038111602011009910010016000010000006500000010110009160124639873000160000100640087640039640039640039640039
160204640038479400000726479847251601001001600001001600005002287689506400190640038640038619790036199961601002001600002004800006400386400381116020110099100100160000100000000201000010110001316097639873000160000100640039640039640039640039640039
160204640134479410000726479865251601061001600001001600005002287689506400190640038640038619790036199961601002001600002004800006400386400381116020110099100100160000100000000000001011000716045639873000160000100640039640039640135640039640039
160204640038479400000749479865251601001001600001001600005002287689506400190640038640038619790036199961601002001600002004800006400386400381116020110099100100160000100000000000001011000192020810639873000160000100640079640039640039640039640039
1602046400384794000352061479865251601001001600001001600005652287689506400190640086640038619790036199961601002001600002004802166400386400381116020110099100100160000100000010000001011000916049639873000160000100640039640039640039640039640039
160204640038479400000180047986537160100100160000100160000500228768950640019064003864003861979003619996160100200160000200480000640076640038111602011009910010016000010000001030000101100010160996398731400160000100640039640039640039640039640039
1602046400384795000001844479865251601001001600001001600005002287689506400190640086640038619790036199961601002001600002004800006400866400861116020110099100100160000100000000000001011001816094639873000160000100640039640039640039640039640039
160204640038479500000726479865251601001001600001001600005002287689506400190640038640038619790036199961601002001600002004800006400386400382116020110099100100160000100000000000001011020916049639873000160000100640039640039640039640039640039
160204640038479400000726479865251601001001600001001600005002287703916400190640038640038619790036199961601002001600002004800006400386400381116020110099100100160000100000000000101011020716049639873000160000100640039640039640039640039640039

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 4.0002

retire (01)cycle (02)03040708090b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8acc2c5branch mispredict (cb)cdcfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600246400384794100000000076547986525160017111600001016000050228768951156400190640038640086619812036200181600102016000020480000640076640038111600211091010160000100000000001002285281611174639873021211516000010640039640039640039640039640039
16002464003847950000000000637479829251600101016000010160000502287689511564001906400386400386198120362001816001020160000204800006400386400381116002110910101600001000000000010022114141612164639873042423016000010640039640039640039640039640039
160024640038479400000001500738479865251600101016000010160000502287689501564001906400386400386198120362001816001020160000204800006400386400381116002110910101600001000000000110024115141622235639873042213016000010640039640039640039640039640039
16002464003847950000000000738479865251600101016000010160000502287689501564005606400386400386198120362001816001020160000204800006400386400381116002110910101600001000065900001002285141611155639873021211516000010640087640039640039640039640077
1600246400384794000000000073247986525160010101600001016000050228768951106400190640086640038619812036200181600102016000020480000640038640038111600211091010160000100000000001002285171611259639873021211516000010640080640039640039640039640039
1600246400384794000000000025747986525160010101600001116000050228768951156400190640038640038619812036200181600102016000020480000640038640038111600211091010160000100000000001002285141611177639873021211516000010640039640039640039640039640039
1600246400384795000000000073247986525160010101600001016000050228768951156400190640038640038619812036200181600102016000020480204640038640038311600211091010160000100000852620001002285141611135639873021211516000010640620640039640039640234640039
1600246400384794000000000073247986525160010101600001016000050228768951156400193640038640038619812036200181600102016000020480000640038640038111600221091010160000100000000001002285171651176639873021211516000010640039640039640039640039640039
1600246400384794000000000044747986525160010101600001016000050228768951156400190640038640038619812036200181600102016006820480000640038640038111600211091010160000100000600001002235161611187639873021213016000010640039640039640039640039640039
1600246400384794000000090075347986525160010101600001016000050228768951156400560640038640038619812036200531600102016000020480000640038640038111600211091010160000100000000001002285171611178639873021211516000010640039640039640039640039640083