Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

UMLAL2 (by element, 2D)

Test 1: uops

Code:

  umlal2 v0.2d, v1.4s, v2.s[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03080b1e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8a9accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
1004303723000612548251000100010003983130301830373037241532895100010003000303730371110011000000073116212630100030383038303830383038
1004303722000612548251000100010003983130301830373037241532895100010003000303730371110011000000073116112630100030383038303830383038
1004303723000612548251000100010003983130301830373037241532895100010003000303730371110011000000073116112630100030383038303830383038
1004303723000612548251000100010003983130301830373037241532895100010003000303730371110011000000073116112630100030383038303830383038
1004303723000612548251000100010003983131301830373037241532895100010003000303730371110011000010073116112630100030383038303830383038
1004303722000612548251000100010003983130301830373037241532895100010003000303730371110011000000073116112630100030383038303830383038
1004303722000612548251000100010003983130301830373037241532895100010003000303730371110011000000073116112630100030383038303830383038
1004303723000612548251000100010003983130301830373037241532895100010003000303730371110011000000073116212630100030383038303830383038
10043037230036125482510001000100039831303018303730372415328951000100030003037303711100110000410073116112630100030383038303830383038
1004303723000612548251000100010003983130301830373037241532895100010003000303730371110011000000073116112630100030383038303830383038

Test 2: Latency 1->1

Code:

  umlal2 v0.2d, v1.4s, v2.s[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)0308181e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204300372250000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000710121642296340100001003003830038300383003830038
10204300372250200612953925101001001000010010000500427731303001830037300372826532874510100200100002003000030037300862110201100991001001000010000710121633296703100001003013230085300863013330038
10204300372250000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010006710121622296340100001003003830038300383003830038
10204300372250000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010013710131622296340100001003003830038300383003830038
10204300372251000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000712121622296340100001003003830038300383003830038
10204300372250000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000710121622296340100001003003830038300383003830038
10204300372250000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000710121623296340100001003003830038300383003830038
10204300372250000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000710121622296340100001003003830038300383003830038
10204300372250000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000710121622296340100001003003830038300383003830038
10204300372240000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000710121622296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100243003722500000061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000006403163229630010000103003830038300383003830038
1002430037225000000103295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000006402162229630010000103003830038300383003830038
1002430037225000000145295482510010101000010100005042773130300543003730037282873287671001020100002030000300373003711100211091010100001000050285306403162329630010000103003830038300383003830038
100243003722500000061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000000006403162429630010000103003830038300383003830038
100243003722500000061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000006403162429630010000103003830038300383003830038
100243003722500190061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000006403162329630010000103003830038300383003830038
100243003722500000061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000006402162229630010000103003830038300383003830038
1002530037225000000726295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000026403164329630010000103003830038300383003830038
100243003722400000061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000006403163229630010000103003830038300383003830038
100243003722500000061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000000006403162329630010000103003830038300383003830038

Test 3: Latency 1->2

Code:

  umlal2 v0.2d, v0.4s, v1.s[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)0307080a0b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102043037022741101010110196866192945721910232148100801461134177842908830301983050430517282864828947116372351176720030000300373003711102011009910010010000100037101161129634100001003003830038300383003830038
10204300372250000000010329548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100107101161129634100001003003830038300383003830038
1020430037225000100405010329548251010010010000100100005004277313030018300373003728265328745101002041000020030000300373008421102011009910010010000100028207101161129634100001003003830038300383003830038
1020430037224000000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
10204300372250000000012629548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100107101161129634100001003003830038300383003830038
10204300372250000000061295482510100100100001001000050042773130300183003730037282652128745104112001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
102043003722500000011406129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
102043003722500000011106129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
10204300372250000009606129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
10204300372250000007506129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030818191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100243003722500032407262954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
100243003722400030907262954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000906402162229630010000103003830038300383003830038
1002430037235000270612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
10024300372250003570612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
10024300372250003930612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000006402163229630010000103003830038300383003830038
1002430037225000390612954825100101010000101000050427810930018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
10024300372250003300612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
10024300372250003600612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
10024300372250007710612954825100101010007101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
100243003722500090612954825100101010000101000050427731330018300373003728287328786100102010000203000030037300371110021109101010000100000006402242229630010000103003830038300383003830038

Test 4: Latency 1->3

Code:

  umlal2 v0.2d, v1.4s, v0.s[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a7a8acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102043003723336061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000000007101161129634100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000000007101161129634100001003003830038300383003830038
10204300372256061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000000007101161129634100001003003830038300383003830038
10204300372240061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000000007101161129634100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000000007101161129634100001003003830038300853003830038
10204300372253061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000000007101161129634100001003003830038300383003830038
1020430037225444061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000000007101161129634100001003003830038300383003830038
102043003722512061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000000007101161129634100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000030007101161129634100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000000007101161129634100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024300372251086129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
100243003722566129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
100243003722506129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
100243003722506129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225366129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
100243003722506129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
100243003722506129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100028666402162229630010000103008530038300383003830038
100243003722596129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
100243003722506129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
100243003722506129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  umlal2 v0.2d, v8.4s, v9.s[1]
  movi v1.16b, 0
  umlal2 v1.2d, v8.4s, v9.s[1]
  movi v2.16b, 0
  umlal2 v2.2d, v8.4s, v9.s[1]
  movi v3.16b, 0
  umlal2 v3.2d, v8.4s, v9.s[1]
  movi v4.16b, 0
  umlal2 v4.2d, v8.4s, v9.s[1]
  movi v5.16b, 0
  umlal2 v5.2d, v8.4s, v9.s[1]
  movi v6.16b, 0
  umlal2 v6.2d, v8.4s, v9.s[1]
  movi v7.16b, 0
  umlal2 v7.2d, v8.4s, v9.s[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)0318191e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8a9acc2cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020420078151000032425801001008000010080000500640000020045200642006432280100200800002002400002006420064111602011009910010016000010000000101120216122006101600001002006520065200652006520065
16020420064150002403925801001008000010080000500640000020045200642006432280100200800002002400002006420064111602011009910010016000010000000101110116122006101600001002006520065200652006520065
1602042006415000003925801001008000010080000500640000020045200642006432280100200800002002400002006420064111602011009910010016000010000000101110116122006101600001002006520065200652006520065
1602042006415000003925801001008000010080000500640000020045200642006432280100200800002002400002006420064111602011009910010016000010000000101110216122006101600001002006520065200652006520065
1602042006415040003925801001008000010080000500640000020045200642006432280100200801052002409422006420064111602011009910010016000010000000101110116012006101600001002006520065200652006520065
16020420064150001503925801001008000010080000500640000020045200642006432280100200800002002400002006420064111602011009910010016000010000000101110116112006101600001002006520065200652006520065
1602042006415000003925801001008000010080000500640000020045200642006432280100200800002002400002006420064111602011009910010016000010000000101120216212006101600001002006520065200652006520065
1602042006415000462039258010010080000100800005006400000200452006420064402280100200800002002400002006420064111602011009910010016000010000000101110116122006101600001002006520065200652006520065
1602042006415100903925801001008000010080000500640000020045200642006432280100200800002002400002006420064111602011009910010016000010000000101110116242006101600001002006520065200652006520065
1602042006415000003925801001008000010080000500640000020045200642006432280100200800002002403092006420064111602011009910010016000010000000101130216222006101600001002006520065200652006520065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2507

retire (01)cycle (02)0304181e1f3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0ea? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024200591500018903732580010108000010800005064000011200342005320057322800102080000202400002005320057111600211091010160000100010045311182021119192005015160000102005420054200542005420054
16002420053151101203732580010108000010800005064000011200342005320053322800102080000202400002005320053111600211091010160000100010045311192021120202005015160000102005420054200542005420054
160024200531500033037325800101080000108000050640000112003420053200533228001020800002024000020053200531116002110910101600001002710043311202021118202005015160000102005420058200542005420054
16002420053150000037652580010108000010800005064000001200382005720053322800102080000202400002005320057111600211091010160000100010042622182422217192005030160000102005420054200582005820058
1600242005315010303732580010108000010800005064000001200382005720057322800102080000202400002005720057111600211091010160000101310036312162021120172005015160000102005420054200542005420054
1600242005315010003732580010108000010800005064000011200342005720053322800102080000202400002005320053111600211091010160000101310045612202442219172005430160000102005820058200582005820058
16002420053150103603562580010108000010800005064000001200382005720053322800102080000202400002005720057111600211091010160000100010047321172042220172005430160000102005820054200582005820058
16002420057150103903792580010108000010800005064000001200382005720057322800102080000202400002005720057111600211091010160000100010049622182442219182005430160000102005820058200582005820058
160024200571501081035482580010108000010800005064000001200382005720057322800102080000202400002013820057111600211091010160000100010050622202442221212005430160000102005820058200582005820058
1600242005715010003792580010108000010800005064000001200382005720057322800102080000202400002005720057111600211091010160000100010044311202021120202005015160000102005420054200542005420054

Test 6: throughput

Count: 12

Code:

  umlal2 v0.2d, v12.4s, v13.s[1]
  umlal2 v1.2d, v12.4s, v13.s[1]
  umlal2 v2.2d, v12.4s, v13.s[1]
  umlal2 v3.2d, v12.4s, v13.s[1]
  umlal2 v4.2d, v12.4s, v13.s[1]
  umlal2 v5.2d, v12.4s, v13.s[1]
  umlal2 v6.2d, v12.4s, v13.s[1]
  umlal2 v7.2d, v12.4s, v13.s[1]
  umlal2 v8.2d, v12.4s, v13.s[1]
  umlal2 v9.2d, v12.4s, v13.s[1]
  umlal2 v10.2d, v12.4s, v13.s[1]
  umlal2 v11.2d, v12.4s, v13.s[1]
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)03080b18191e1f373a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
120204301322300000601806102512010010012000010012000050096000013002030039317481665331499712010020012000020036000030040300391112020110099100100120000100000000007643116113003601200001003174930040317493004031749
120204317482250000001061671325120118100120018100120000500439952403002031748300391497331499712010020012000020036000030039317481112020110099100100120000100000000007610116113174501200001003174930040317493004031749
1202043174822500000000726025120100100120000100120000500439952413002030039317481665331499812010020012000020036000030040300391112020110099100100120000100000000007610116113003601200001003174930040317493004031749
120204317482250000001804102512011810012001810012000050096000013002030039300391665331670612010020012000020036000031748300391112020110099100100120000100000000007610116113003601200001003174930040317493004031749
12020431748225000000004167132512011810012001810012000050096000013090330039317481665331670612010020012000020036000030039317481112020110099100100120000100000000007610116113003601200001003174930040300433004030041
1202043004022500001201806102512010010012001810012000050096000013002030039317481665331502412010020012000020036000030039300401112020110099100100120000100000000007610116113174501200001003004030040300403174930923
120204300422250000132018061025120100100120000100120313500273990413002030039300421497331670612010020012000020036000030039317481112020110099100100120000100000000007610116113003701200001003004130040311873004030040
12020430040225000015880041025120100100120000100120000500439952413002031748300391497331499712010020012000020036000031748300391112020110099100100120000100000000007610116113174501200001003004031749300403174930040
120204300392250000000041671325120101100120018100120000500439952413002030039309221497331499712010020012000020036000031748300391112020110099100100120000100000000007610116113003601200001003174930040300413004031749
120204300402250000000041025120100100120000100120000500439952403002030040300391497331499712010020012000020036000031748300391112020110099100100120000100000000007610116113174501200001003004031749300403174930040

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)0318191e373a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9faccfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)dfe0eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1200243004022540480046025120010101202151212052250960000013002030039300391499631501912001020120000203600003003930039111200211091010120000100752462114162111670300361510120000103004030040300403004030040
1200243003922500600460251200101012000010120000509600001130020300393003914996315019120010201200002036000030039300391112002110910101200001007522311716211817030036155120000103004030040300403004030040
12002430039225000170460251200101012000010120000509600000130020300393003914996315019120010201200002036000030039300391112002110910101200001007522311616211717030036155120000103004030040300403004030040
120024300392250000046025120010101200001012000050960000113002030039300391499631501912001020120000203600003003930039111200211091010120000100752231151621116132300364653120000103004030040300403004030040
12002430039225000004602512001010120000101200005096000011309033003930039149963150191200102012000020360000300393003911120021109101012000010075229111616211165030036155120000103004030040300403004030040
1200243003922500000460251200101012000010120000509600001130020300393003914996315019120010201200002036000030039300391112002110910101200001007522311716211516030036155120000103004030040300403004030040
12002430039225000005202512001010120000101200005096000001300203003930039149963150191200102012000020360000300393003911120021109101012000010075223111616211516030036305120000103004030040300403004030040
120024300392240000052025120010101200001012000050960000013002030039300391499631501912001020120000203600003003930039111200211091010120000100752231116162111616030036155120000103004030040300403004030040
120024309222250000146025120010101200001012000050960000113002030039300391499631501912001020120000203600003003930039111200211091010120000100752462216164225160300363010120000103004030040300403004030040
12002430039225000004602512001010120000101200005096000011300203003930039149963150191200102012000020360000300393003911120021109101012000010075223111616211167030036155120000103004030040300403004030040