Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SQRDMLAH (by element, 4H)

Test 1: uops

Code:

  sqrdmlah v0.4h, v1.4h, v2.h[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)033f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8a9accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
1004303723145254825100010001000398313030183037303724153289510001000300030373037111001100010373116112630100030383038303830383038
1004303722251254825100010001000398313030183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
100430372361254825100010001000398313030183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
100430372361254825100010001000398313030183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
100430372261254825100010001000398313130183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
100430372261254825100010001000398313030183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
100430372361254825100010001000398313030183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
100430372361254825100010001000398313030183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
100430372261254825100010001000398313030183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
1004303722103254825100010001000398313030183037303724153289510001000300030373037111001100010073116112630100030383038303830383038

Test 2: Latency 1->1

Code:

  sqrdmlah v0.4h, v1.4h, v2.h[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030b1e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8acc5cfd0d2d5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204300372250006129548251010010010000100100005004277313053001830037300372826532874510100200100002003000030037300371110201100991001001000010000000710632162229634100001003003830038300383003830038
10204300372250006129548251010010010000100100005004277313053001830037300372826932874510100200100002003000030037300371110201100991001001000010000000710103162229634100001003003830038300383003830038
10204300372250006129548251010010010000100100005004277313103001830037300372826532874510100200100002003000030037300371110201100991001001000010000000710632162229634100001003003830038300383003830038
10204300372250006129548251010010010000100100005004277313103001830037300372826532874510100200100002003000030037300371110201100991001001000010000000710632162229634100001003003830038300383003830038
10204300372250006129548251010010010000100100005004277313053001830037300372826532874510100200100002003000030037300371110201100991001001000010000300710632162329634100001003003830038300383003830038
10204300372250006129548251010010010000100100005004277313103001830037300372826532874510100200100002003000030037300371110201100991001001000010000000710632162229634100001003003830038300383003830083
10204300372250006129548251010010010000100100005004277313053001830085300372826532874510100200100002003000030037300371110201100991001001000010000000710632162229634100001003003830038300383003830038
10204300372250006129548251010010010000100100005004277313053001830037300372826532874510100200100002003000030037300371110201100991001001000010000000710602162229634100001003003830038300383003830038
10204300372250006129548251010010010000100100005004277313053001830037300372826532874510100200100002003000030037300371110201100991001001000010000030710632162229634100001003003830038300383003830038
10204300372250006129548251010010010000100100005004277313053001830037300372826532874510100200100002003000030037300371110201100991001001000010000000710102162229634100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030708090b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2c5cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100243003722500000000166295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010700000006402162229630010000103003830038300383003830038
10024300372250000000061295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000000006402162229630010000103003830038300383003830038
10024300372330000000061295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000000006402162229630010000103003830038300383003830038
10024300372250000000061295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000000006402163229630010000103037030396303673037030371
10024303682270100778045303326294851611006812100561410931504286812302703041430366283183328900110592411136223394230179303698110021109101010000100231219470407924746629944010000103037030370303683037030133
1002430357228000010144881201294941041004412100321210542614284098302343031830310283082928824107552010823243329430217301796110021109101010000100001013923007304167229738010000103037030604303693041730368
1002430404235001113111584792863429440234100801410088181119261428952630342303673041328320602896511894241103622358893046130465121100211091010100001020004277100083341018730028210000103046430462303213036830414
1002430274237310156132361661295482510028101000010104477742827413005430593304162833059289701090626113182434512306533051214110021109101010000100020028080008314486329630010000103003830038300383003830038
10024300742250000000061295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000000006403162229630010000103003830038300383003830038
10024300372250000000061295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000000006402162229630010000103003830038300383003830038

Test 3: Latency 1->2

Code:

  sqrdmlah v0.4h, v0.4h, v1.h[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a8a9acc5cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020430037225000120612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000071011611296340100001003003830038300383003830038
1020430037225000690612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000103071011611296340100001003003830038300383003830038
1020430037225000001452954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000003071011611296340100001003003830038300383008530038
10204300372250003301032954825101001001000010010000500427731303001830037300372826532874510100202100002003000030037300371110201100991001001000010000000071011611296340100001003003830038300383003830038
1020430037225000150612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000071011611296340100001003003830038300383003830038
1020430037225000150612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000071011611296340100001003003830038300383003830038
102043003722500090612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000071011611296340100001003003830038300383003830038
10204300372250001804052954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000071011611296340100001003003830038300383003830038
1020430037225000120612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000071011611296340100001003003830038300383003830038
10204300372250001503462954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000071011611296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6acc5cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024300372250612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100180640216222963010000103003830038300383003830038
1002430037225061295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037224061295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037225061295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037225061295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037224061295482510018101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037224071295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037225061295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037225061295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037225061295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038

Test 4: Latency 1->3

Code:

  sqrdmlah v0.4h, v1.4h, v0.h[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030b181e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102043003722500906129548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038
10204300372250025806129548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038
10204300372250018606129548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038
1020430037225001806129548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038
1020430037225001506129548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038
1020430037225001506129548251010010010000100100005004277313300183003730037282653287451010020010000202300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038
10204300372250016506129548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038
1020430037225005106129548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038
102043003722500240018929548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038
1020430037225006072629548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03070a0b1e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024300372241100021122954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000644101610112963010000103003830038300383003830038
10024300372251104202682954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000644101610102963010000103003830038300383003830038
100243003722511036002682954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000644101610102963010000103003830038300383003830038
100243008422511047702682954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000644111610102963010000103003830038300383003830038
10024300372251102702682954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000644101611102963010000103003830038300383003830038
1002430037225110480268295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000064411161162963010000103003830038300383003830038
10024300372251101740268295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000064481610102963010000103003830038300383003830038
100243003722511029402682954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000644101610102963010000103003830038300383003830038
100243003722511042902682954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000644111611112963010000103003830038300383003830038
1002430037225110002682954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000644101610102963010000103003830038300383003830038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  sqrdmlah v0.4h, v8.4h, v9.h[1]
  movi v1.16b, 0
  sqrdmlah v1.4h, v8.4h, v9.h[1]
  movi v2.16b, 0
  sqrdmlah v2.4h, v8.4h, v9.h[1]
  movi v3.16b, 0
  sqrdmlah v3.4h, v8.4h, v9.h[1]
  movi v4.16b, 0
  sqrdmlah v4.4h, v8.4h, v9.h[1]
  movi v5.16b, 0
  sqrdmlah v5.4h, v8.4h, v9.h[1]
  movi v6.16b, 0
  sqrdmlah v6.4h, v8.4h, v9.h[1]
  movi v7.16b, 0
  sqrdmlah v7.4h, v8.4h, v9.h[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)031e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8a9acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602042008915012041925801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010000000101121116222006101600001002006520065200652006520065
1602042006415000392580100100800001008000050064000012004520064200643228010020080000200240000200642006411160201100991001001600001000000010112216222006101600001002006520065200652006520065
160204200641500176392580100100800001008000050064000012004520064200643228010020080000200240000200642006411160201100991001001600001000500010112216222006101600001002006520065200652006520065
1602042006415000392580100100800001008000050064000012004520064200643228010020080000200240000200642006411160201100991001001600001000000010112216222006101600001002006520065200652006520065
1602042006415100392580100100800001008000050064000012004520064200643228010020080000200240000200642006411160201100991001001600001000000010112416222006101600001002006520065200652006520065
1602042006415000392580100100800001008000050064000012004520064200643228010020080000200240000200642006411160201100991001001600001000000010112216222006101600001002006520065200652006520065
16020420064151120392580100100800001008000050064000012004520064200643228010020080000200240000200642006411160201100991001001600001000000010112216222006101600001002006520065200652006520065
16020420064150150392580100100800001008000050064000012004520064200643228010020080414200240000201452022611160201100991001001600001000000010112216222006101600001002006520065200652006520065
160204200641515220392580100100800001008000055464000012004520064200643228010020080000200240000200642014611160201100991001001600001000000010112216222006101600001002006520065200652006520065
16020420064151150392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000000010112216222006101600001002006520065200652006520065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)031e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8a9accfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)ea? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002420067150120512580012128000012800006264000011200312004620046322800122080000202400002004620046111600211091010160000100100100303118202124220043215160000102005120051200512005120051
16002420046150120452580012128000012800006264000011200272004620045322800122080000202400002004620046111600211091010160000100006001002531122012115220043215160000102004720047200472011820047
16002420046150240662580012128000012800006264000010200272004620046322800122080000202400002004620046111600211091010160000100000100273114202112420043215160000102004720047200472004720047
16002420046150270452580012128000012800006264000011200272004620046322800122080000202400002004620046111600211091010160000100000100273114202114220043215160000102004720047200472004720047
1600242004615000452580012128000012800006264000011200272004620046322800122080000202400002004620046111600211091010160000100000100303114202117720043215160000102004720047200472004720047
1600242004615000452580012128000012800006264000011200272005020046322800122080000202400002004620046111600211091010160000100000100253114202112420043215160000102004720047200472004720047
1600242004615000452580012128000012800006264000011200272004620046322800122080000202400002004620046111600211091010160000100000100253113202117320043215160000102004720047200472004720047
1600242004615000452580012128000012800006264000011200272004620046322800122080000202400002004620046111600211091010160000100000100273112202114420043215160000102004720047200472004720047
16002420046150240662580012128000012800006264000011200272004620046322800122080000202400002004620046111600211091010160000100000100313112202114220043215160000102004720047200472004720047
1600242004615000452580012128000012800006264000011200272004620046322800122080000202400002004620046111600211091010160000100000100273112202112420043215160000102004720047200472004720047

Test 6: throughput

Count: 12

Code:

  sqrdmlah v0.4h, v12.4h, v13.h[1]
  sqrdmlah v1.4h, v12.4h, v13.h[1]
  sqrdmlah v2.4h, v12.4h, v13.h[1]
  sqrdmlah v3.4h, v12.4h, v13.h[1]
  sqrdmlah v4.4h, v12.4h, v13.h[1]
  sqrdmlah v5.4h, v12.4h, v13.h[1]
  sqrdmlah v6.4h, v12.4h, v13.h[1]
  sqrdmlah v7.4h, v12.4h, v13.h[1]
  sqrdmlah v8.4h, v12.4h, v13.h[1]
  sqrdmlah v9.4h, v12.4h, v13.h[1]
  sqrdmlah v10.4h, v12.4h, v13.h[1]
  sqrdmlah v11.4h, v12.4h, v13.h[1]
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)031e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8acc2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
120204300622253300410251201181001200181001200005009600001300203004030039149733149971201002001200002003600003003930040111202011009910010012000010000000761031611300371200001003004031749300403004130040
12020430039238000410251201181001200181001200005009600001300203003930830149733149971201002001200002003600003004030039111202011009910010012000010000000761011611300361200001003174930040317493004031749
120204317482252701863102512010010012000010012000050043995241309913174830039149733149971201002001200002003600003003930040111202011009910010012000010001000761011611300371200001003009130923300403004031749
12020431748224000410251201181001200011001200005009600001300203003931748166533149981201002001200002003600003174830039111202011009910010012000010000020761011611317451200001003004030041300403174930040
120204300392256016102512010010012000010012000050043995241317293004030039149733149971201002001200002003600003003930040111202011009910010012000010000000761011611300361200001003004130040300413004031749
120204317482250004102512010010012000010012000050043995241300213174830039149733149971201002001200002003600003003931748111202011009910010012000010000000761011611300361200001003004130040300413004031749
12020431748225001861025120118100120018100120000500960000130020300393003914973316706120100200120000200360000300393174811120201100991001001200001000011400761011611300371200001003174930040300413004031749
1202043174822500186167132512010110012000110012000050043995241300213004230039149733167061201002001200002003600003003931748111202011009910010012000010000000761011611317451200001003174930040317493004030043
120204317482250004102512010010012000010012000050043995241300203003931748166533167061201002001200002003600003003931748111202011009910010012000010000000761011611317451200001003004031749300403174930040
12020430039238120186102512010010012000010012000050043995241317293003930039149733149971201002001200002003600003003930039111202011009910010012000010000000761011611300361200001003174930040317493004031749

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)03080b1e373f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acc2cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
120024303382250033040251200101012000010120000509600000300203003930039149963150221200102012000020360000300423003911120021109101012000010000075202716121230036120000103004030040300403004030040
1200243003922500151840251200101012000010120000509600000300203003930039149963150191200102012000020360000300393003911120021109101012000010000075201116111331740120000103004030040300403004030040
120024300392250012040251200101012000010120000509600000300203003930039149963150191200102012000020360000300393003911120021109101012000010000075201216131130036120000103004030040300403004030040
12002430039225006040251200101012000010120000509600000300203003930039149963150191200102012000020360000300393003911120021109101012000010000075201416141030036120000103004030040300403004030040
120024300392250015061251200101012000010120000509600001300203003930039149963150191200102012000020360000300393003911120021109101012000010000075201216121230036120000103004030040300403004030040
120024300392250030402512001010120000101200005096000003090330039300391499631501912001020120000203600003003930039111200211091010120000100000752091691230036120000103004030040300403004030040
12002430039225000040251200101012000010120000509600000300203003931010149963150191200102012000020360000300393003911120021109101012000010000075201416141130036120000103004030040300403004030040
12002430039225000040251200101012000010120000509600000300203003930039149963150191200102012000020360000300393003911120021109101012000010000075201216151430036120000103004030040300403004030040
120024300392250057040251200101012000010120000509600000300203003930039149963150191200102012000020360000300393003911120021109101012000010000075201216121230036120000103004030040300403004030040
120024300392250027040251200101012000010120000509600001300203003930039149963150191200102012000020360000317433003911120021109101012000010000075201016121030036120000103004030040300403004030040