Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SQRDMLAH (by element, H)

Test 1: uops

Code:

  sqrdmlah h0, h1, v2.h[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03081e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
100430372300612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372300612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
1004303723012612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372300612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372200612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372200612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
1004303722006125482510001000100039831330183037303724153289510001000300030373037111001100014073116112630100030383038303830383038
100430372300612548251000100010003983133018303730372415328951000100030003037303711100110000973124112630100030383038303830383038
100430372309612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372300612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038

Test 2: Latency 1->1

Code:

  sqrdmlah h0, h1, v2.h[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)0308090b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8a9acc2cfd0icache miss (d3)d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102043003722500000000612954825101001001000010010000500427731313001803003730037282653287451010020010000200300003003730037111020110099100100100001000000007101051622296340100001003003830038300383003830038
102043003722500000000612954825101001091000010010000500427731313001803003730037282653287451010020010000200300003003730037111020110099100100100001000020607101021622296340100001003003830038300383003830038
102043003722500000000612954825101001001000010010000500427731313001803003730037282653287451010020010000200300003003730037111020110099100100100001000000007101021632296340100001003003830038300383003830038
1020430037224000000006129548251010010010000100100005004277313130018330037300372826532874510100200100002003000030037300371110201100991001001000010000701092007101021622296340100001003003830038300383003830038
102043003722500000000612954825101001001000010010000500427731313001803003730037282653287451010020010000200300003003730037111020110099100100100001000000007101021622296340100001003003830038300383003830038
1020430037225000000002352954825101001001000010010000500427731313001803003730037282653287451010020010000200300003003730037111020110099100100100001000000007101021622296340100001003003830038300383003830038
102043003722500000000612954825101001001000010010000500427731313001803003730037282653287451010020010000200300003003730037111020110099100100100001000000007101021622296340100001003003830038300383003830038
102043003722400000000612954825101001001000010010000500427731313001803003730037282653287451010020010000200300003003730037111020110099100100100001000010007101021622296340100001003003830038300383003830038
1020430037233000006900612954825101001001000010010000500427731313001803003730037282653287451010020010000200300003003730037111020110099100100100001000000307121021622296340100001003003830038300383003830038
102043003722500000000612954825101001001000010010000500427731313001803003730037282653287451010020010000200300003003730037111020110099100100100001000000007101021623296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)033f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037225612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010032640316222963010000103003830038300383003830038
1002430037225612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010130640216222963010000103003830038300383003830038
1002430037225612954825100101010000101000050427731303001830037300372828732878710159201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037225612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000662216222963010000103003830038300383003830038
100243003722520592954825100101010008101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010600640216222963010000103003830038300383003830038
1002430037225612954825100101010000121000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010030640216222963010000103003830038300383003830038
1002430037224612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010030640216222963010000103003830038300383003830038
1002430037225612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010230640216222963010000103003830038300383003830038
1002430037225612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010100640216222963010000103003830038300383003830038
10024300372253462954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010130640216222963010000103003830038300383003830038

Test 3: Latency 1->2

Code:

  sqrdmlah h0, h0, v1.h[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102043003722506129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000071011611296340100001003003830038300383003830038
102043003722406629548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000071011611296340100001003003830038300383003830038
102043003722506129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000071011611296340100001003003830038300383003830038
102043003722506129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100001071011611296340100001003003830038300383003830038
10204300372256636129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000071011611296340100001003003830038300383003830038
102043003722506129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000071011611296340100001003003830038300383003830038
102043003722506129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000071011611296340100001003003830038300383003830038
102043003722506129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000071011611296340100001003003830038300383003830038
102043003722506129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100001071011611296340100001003003830038300383003830038
102043003722506129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000071011611296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030918191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037225000006129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225000006129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037224000006129548251001010100001010000504277313130018300373008428287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225000006129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000101006402162229630010000103003830038300383003830038
1002430037225000006129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225000006129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037224000006129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037224000006129548251001010100001010000504277313030018300373003728287328767100102210000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225000006129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100006402162229703010000103003830038300383003830038
1002430037225000009429548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038

Test 4: Latency 1->3

Code:

  sqrdmlah h0, h1, v0.h[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030b181e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204300372250000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000295171021611296340100001003003830038300383003830038
10204300372250000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10204300372240000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10204300372250000822954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10204300372240000612954825101001001000010010000500427731303001830037300372826532874510100200100002043000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10204300372250000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10204300372250000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10204300372240000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
102043003722500605362954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000371011610296340100001003003830038300383003830038
10204300372250000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037225000000612954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037225000000612954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000640216222976910000103003830038300383003830038
1002430037224000000612954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
10024300372250001322640612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037225000000612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037225000000612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037225000000612954825100101010000101000050427731303006530037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037225000000612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037225000000612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037225000000612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  sqrdmlah h0, h8, v9.h[1]
  movi v1.16b, 0
  sqrdmlah h1, h8, v9.h[1]
  movi v2.16b, 0
  sqrdmlah h2, h8, v9.h[1]
  movi v3.16b, 0
  sqrdmlah h3, h8, v9.h[1]
  movi v4.16b, 0
  sqrdmlah h4, h8, v9.h[1]
  movi v5.16b, 0
  sqrdmlah h5, h8, v9.h[1]
  movi v6.16b, 0
  sqrdmlah h6, h8, v9.h[1]
  movi v7.16b, 0
  sqrdmlah h7, h8, v9.h[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)033f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204200901501692580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000010111116112006101600001002006520065200652006520065
16020420064150392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000010111116112006101600001002006520065200652006520065
16020420064151392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000010111116112006101600001002006520065200652006520065
1602042006415011742580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000010111116112006101600001002006520065200652006520065
16020420064150812580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000010111116112006101600001002006520065200652006520065
16020420064150392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160202100991001001600001000010111116112006101600001002006520065200652006520065
16020420064150392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001001310111116112006101600001002006520065200652006520065
16020420064150392580100100800001008031150064000002004520064200643228010020080000200240000200642006411160201100991001001600001000010111116112006101600001002006520065200652006520065
16020420064150392580100100800001008012850064000002004520064200643228010020080000200240000200642006411160201100991001001600001000010111116112006101600001002006520065200652006520065
16020420064151392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000010111116112006101600001002006520065200652006520065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024200591500452780012128000012800006264000011200322005120051322800122080000202400002005120051111600211091010160000100010027311625211632004824332160000102005220052200522005220052
160024200511500452780012128000012800006264000001200322006020051322800122080000202400002005120051111600211091010160000100010025311234211242004823121160000102005220052200522005220052
160024200511500452780012128000012800006264000011200322005120051322800122080000202400002005120145111600211091010160000100010027311225211462004823441160000102006120052200522005220052
160024200511500452780012128000012800006264000011200322005120051322800122080000202400002005120051111600211091010160000100010027311225211422004822771160000102005220061200522005220052
16002420051150045278001212800001280000626400001120041200512005132280012208000020240000200512005111160021109101016000010001002731132521142200482402160000102005220061200522006120061
160024200511510150278001212800001280000626400000120041200602005132280012208000020240000200512006011160021109101016000010001002562123422124200572201160000102005220052200522005220052
16002420051150045298001212800001280000626400001120032200512005132280012208000020240000200512006011160021109101016000010001002531142522143200482401160000102005220052200522006120052
16002420051151087278001212800001280000626400001120032200512005132280012208000020240000200512005111160021109101016000010001002531142521165200482201160000102005220052200522005220052
16002420060150045278001212800001280000626400001120032200512005132280012208000020240000200512005111160021109101016000010001002731142521164200572201160000102005220052200522005220052
16002420051150045278001212800001280000626400001120032200512005132280012208000020240000200512005111160021109101016000010001002531142521142200482201160000102005220052200522005220052

Test 6: throughput

Count: 12

Code:

  sqrdmlah h0, h12, v13.h[1]
  sqrdmlah h1, h12, v13.h[1]
  sqrdmlah h2, h12, v13.h[1]
  sqrdmlah h3, h12, v13.h[1]
  sqrdmlah h4, h12, v13.h[1]
  sqrdmlah h5, h12, v13.h[1]
  sqrdmlah h6, h12, v13.h[1]
  sqrdmlah h7, h12, v13.h[1]
  sqrdmlah h8, h12, v13.h[1]
  sqrdmlah h9, h12, v13.h[1]
  sqrdmlah h10, h12, v13.h[1]
  sqrdmlah h11, h12, v13.h[1]
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)03080b191e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1202043005922500000041025120100100120000100120000500960000030020300393174814973314997120100200120000200360000300413004111120201100991001001200001000007610116113174501200001003004031749300403174930040
1202043003923700000041025120118100120018100120000500960000130020300393003914973316706120100200120000200360000300393003911120201100991001001200001000007610116113003701200001003004031749300403174930040
120204300392380000011209025120100100120000100120000500439952403172930040300391497331499712010020012000020036000030042309221112020110099100100120000100068127610116113174501200001003004031749300403004130040
1202043003923800000041671325120118100120018100120000500960000130020300393174816653316706120100200120000200360000300393101011120201100991001001200001000007610116113003601200001003174930040317493004030041
1202043174822500000041025120118100120001100120000500960000130020300393004016653316706120100200120000200360000300393003911120201100991001001200001000007610116113174501200001003004031749300403004130040
1202043003923700000041025120118100120018100120000500960000030020300393174814973314998120100200120000200360000300403175011120201100991001001200001000007610116113003601200001003004031749300403174930040
120204300402250000018610251201001001200001001200005004399524030021300403003914973314997120100200120000200360000300393003911120201100991001001200001000007610116113174501200001003004030041300403174930040
1202043003923800000041671325120118100120018100120000500960000030020300393174816653314998120100200120000200360000300413003911120201100991001001200001000007610116113003701200001003174930040317493004031749
1202043003922500000061025120100100120000100120000500990000031729317623003914973314997120100200120000200360000300393003911120201100991001001200001000007610116113003601200001003174930040317493004031749
12020431748225000001742025120100100120000100120000500960000130020300393174816653314998120100200120000200360000300393003911120201100991001001200001000007610116113174501200001003004031749300403004031749

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)0308181e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6dbddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
120024300402250000016434912512001110120001101200005099000003002031750300391499631501912001020120000203600003175030039111200211091010120000100075207161155300360120000103004030040300403004030040
1200243003922500001610251200101012000010120000509600000300203003930039149963150191200102012000020360000300393003911120021109101012000010007522516955300362120000103004030040300403004030040
1200243003922500000820251200101012000010120000509900000300203003930040166773150201200102012000020360000300393003911120021109101012000010007520516855300360120000103004030040300403004030040
120024300392250000014702512001010120000101200006096000003002030039300391499631501912001020120000203600003004030039111200211091010120000100075205161055300360120000103004130040300433004030040
1200243003922500000400251200111012003510120000509600000300203003930039149963150191200102012000020360000300393175011120021109101012000010007520516953300360120000103004130040317513004030040
12002430039225000035610251200101012000010120000509900000317313175030039149963150191200102012000020360000300403003911120021109101012000010007520516955300360120000103004030040300403004030040
12002430039238000017190251200101012000110120000609600000300203003930039149967150981202222012031420361884315673061161120021109101012000010007522416953300360120000103004030040300403004030040
1200243003922400000400251200111012000110120000509600000300213175030039149963167181200102012000020360000300393003911120021109101012000010007520316953300360120000103175130040300433004130040
120024300392380000040025120010101200001012000050439406103173130039300401667731501912001020120000203600003003931750111200211091010120000100075206161055300360120000103175130040317513004031751
1200243003922500000400251200621012003410120000509600000300203003930039149963150191200102012000020360000300393003911120021109101012000010007520516935317470120000103004031751300403004130040