Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

UMLSL (vector, 4S)

Test 1: uops

Code:

  umlsl v0.4s, v1.4h, v2.4h
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
100430372306125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372206125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372206125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
1004303722061254825100010001000398313130183037303724153289510001000300030373037111001100006373116112630100030383038303830383038
100430372206125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372206125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
1004303723061254825100010081000398313130183037303724153289510001000300030373037111001100006373116112630100030383038303830383038
100430372206125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372206125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038

Test 2: Latency 1->1

Code:

  umlsl v0.4s, v1.4h, v2.4h
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03080b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020430037224000000066295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000710031633296680100001003003830038300383003830038
10204300372250000120064295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000710031633296340100001003003830038300383003830038
1020430037225000000061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000710031633296340100001003003830038300383003830038
10204300372250000000271295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000710031633296340100001003003830038300383003830038
1020430037224000000061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000710031633296340100001003003830038300383003830038
1020430037225000000061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000710031633296340100001003003830038300383003830038
1020430037225000000061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000710031633296340100001003003830038300383003830038
10204300372251000000612954825101001001000010010000500427731303001830037300372826532879210100200100002003000030037300371110201100991001001000010000000150710031633296340100001003003830038300383003830038
102043003722500000001374295482510100100100001001000050042773130300183003730037282653287451010020010164200300003003730037111020110099100100100001000000000710031633296340100001003003830038300383003830038
1020430037225000000061295482510100100100321001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000700710031633296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03080b191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8a9acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024300372250000822954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000000640516222963010000103003830038300383003830038
1002430037225000010922954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
100243003722500009822954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003721100211091010100001000000640216222963010000103003830038300383003830038
1002430037225000011872954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
100243003722500003252954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000000640316222963010000103003830038300383003830038
1002430037224000010502954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
1002430037225000010192954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
1002430037225000011012954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
1002430037225000010832954825100101010000101000050427731313006503003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038

Test 3: Latency 1->2

Code:

  umlsl v0.4s, v0.4h, v1.4h
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8accdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020430037225061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000007101161129634100001003003830038300383003830038
1020430037225061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000007101161129634100001003003830038300383003830038
10204300372250912295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000007101161129634100001003003830038300383003830038
1020430037225061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000007101161129634100001003003830038300383003830038
1020430037224061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000007101161129634100001003003830038300383003830038
1020430037225061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000007101161129634100001003003830038300383003830038
1020430037225061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000007101161129634100001003003830038300383003830038
1020430037225061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000007101161129634100001003003830038300383003830038
1020430037225061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000007101161129634100001003003830038300383003830038
1020430037225061295482510100100100001001000050042773133001830037300852826532874510100200100002003000030037300371110201100991001001000010001007101161129634100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03080b1e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024300372240009492954825100101010000101000050427731316300183003730037282873287671001020101672030000300373003711100211091010100001000064064162229630010000103003830038300383003830038
1002430037226000612954825100101010000101000050427731315300183003730037282873287671001020100002030000300373003711100211091010100001000064052162229630010000103003830038300383003830038
10024300372250002512954825100101010000101000050427731315300183003730037282873287671001020100002030000300373003711100211091010100001000064022162229630010000103003830038300383003830038
1002430037224000612954825100101010000101000050427731312300183003730037282873287671001020100002030000300373003711100211091010100001000064022162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731312300183003730037282873287671001020100002030000300373003711100211091010100001000064022162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731312300183003730037282873287671001020100002030000300373003711100211091010100001003064022162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731312300183003730037282873287671001020100002030000300373003711100211091010100001000064022162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731312300183003730037282873287671001020100002030000300373003711100211091010100001000064022162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731312300183003730037282873287671001020100002030000300373003711100211091010100001003064022162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731312300183003730037282873287671001020100002030000300373003711100211091010100001000064022162229630010000103003830038300383003830038

Test 4: Latency 1->3

Code:

  umlsl v0.4s, v1.4h, v0.4h
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)0318191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a9acc2cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102043003722500006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500008929548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161029634100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03040f1e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037225110612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037224000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300841110021109101010000100006402162229630010000103003830038300383003830038
1002430037224000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  umlsl v0.4s, v8.4h, v9.4h
  movi v1.16b, 0
  umlsl v1.4s, v8.4h, v9.4h
  movi v2.16b, 0
  umlsl v2.4s, v8.4h, v9.4h
  movi v3.16b, 0
  umlsl v3.4s, v8.4h, v9.4h
  movi v4.16b, 0
  umlsl v4.4s, v8.4h, v9.4h
  movi v5.16b, 0
  umlsl v5.4s, v8.4h, v9.4h
  movi v6.16b, 0
  umlsl v6.4s, v8.4h, v9.4h
  movi v7.16b, 0
  umlsl v7.4s, v8.4h, v9.4h
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)03191e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204200891500003925801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010000001011111611200611600001002006520065200652006520065
160204200641500008125801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010044001011111611200611600001002023420065200652006520065
1602042006415011711763925801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010000001011111641200611600001002006520065200652006520065
160204200641510003925801001008000010080000500640000020045200642006432280100200800002002400002006420064111602011009910010016000010000001011111611200611600001002006520065200652006520065
160204200641500003925801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010000001011111611200611600001002006520065200652006520065
160204200641500008125801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010000001011111611200611600001002006520065200652006520065
160204200641500003925801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010000001011111611200611600001002006520065200652006520065
16020420064150041703925801001008000010080000500640000120045200642035332280100200800002002400002006420064111602011009910010016000010000031011111611200611600001002006520065200652006520065
160204200641500003925801001008000010080000500640000020045200642006432280100200800002002400002006420064111602011009910010016000010000001011111611200611600001002006520065200652006520065
16020420064150012018825801001008000010080000500640000020045200642006432280100200800002002400002006420064111602011009910010016000010000001011111611200611600001002006520065200652006520065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2507

retire (01)cycle (02)03041e3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6e74scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8branch mispredict (cb)cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0ea? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024200581501036625800101080000108000050640000112003420053200530322800102080000202400002005320053111600211091010160000100010041311312021124262005015160000102005420054200542005420054
160024200531501036625800101080000108000050640000112003420053200530322800102080000202400002005320053111600211091010160000100010052311212021126222005015160000102005420054200542005420054
160024200531501037225800101080000108000050640000012003420053200530322800102080000202400002005320053111600211091010160000100010051312232041122252005015160000102005820054200582005420054
1600242005315010315225800101080000108000050640000112003420053200530322800102080000202400002005720057111600211091010160000100010049311252021126252005015160000102005420054200542005420054
160024200531501036625800101080000108000050640000112003420053200530322800102080000202400002005320053111600211091010160000100010043311242041123242005015160000102005420054200542005420054
1600242005315010325425800101080000108000050640000112003420053200530322800102080000202400002005320053111600211091010160000100010048321242021125212005015160000102005420054200542005420054
160024200531501036625800101080000108000050640000112003420053200530322800102080000202400002005320053111600211091010160000100010053311252021120242005015160000102005420054200542005420054
160024200531501036625800101080000108000050640000012003420053200530322800102080000202400002005320053111600211091010160000100010053311262021125242005015160000102005420054200542005420054
160024200531501036625800101080000108000050640000112003420053200535322800102080000202400002005320057111600211091010160000100010051311262041125242005015160000102005420054200542005420054
1600242005315010381125800101080000108000050640000112003420053200530322800102080000202400002005320053111600211091010160000100010055311272021122232005015160000102005420054200542005420054

Test 6: throughput

Count: 16

Code:

  umlsl v0.4s, v16.4h, v17.4h
  umlsl v1.4s, v16.4h, v17.4h
  umlsl v2.4s, v16.4h, v17.4h
  umlsl v3.4s, v16.4h, v17.4h
  umlsl v4.4s, v16.4h, v17.4h
  umlsl v5.4s, v16.4h, v17.4h
  umlsl v6.4s, v16.4h, v17.4h
  umlsl v7.4s, v16.4h, v17.4h
  umlsl v8.4s, v16.4h, v17.4h
  umlsl v9.4s, v16.4h, v17.4h
  umlsl v10.4s, v16.4h, v17.4h
  umlsl v11.4s, v16.4h, v17.4h
  umlsl v12.4s, v16.4h, v17.4h
  umlsl v13.4s, v16.4h, v17.4h
  umlsl v14.4s, v16.4h, v17.4h
  umlsl v15.4s, v16.4h, v17.4h
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2502

retire (01)cycle (02)0318191e373f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602044006030000304125160100100160000100160000500239899904002140048400481997331999716010020016000020048000040048400392116020110099100100160000100001011021611400361600001004004940040400494004040040
1602044003930000005025160100100160017100160000500128000014003340039400491997332000616010020016000020048000040039400481116020110099100100160000100001011011611400361600001004004940049400404004940049
1602044003930000004125160100100160187122160000500239899904002940048400481997331999716010020016000020048000040048400391116020110099100100160000100001011011611400361600001004004940040400494004040049
16020440048300000174125160117100160017100160000500128000004002440048400391997331999716010020016000020048000040048400391116020110099100100160000100001011011611400361600001004004040049400404005040040
1602044003930000005025160117100160017100160000500128000004002940048400391997332000616010020016000020048000040039400481116020110099100100160000100001011011611400451600001004005040040400494004040049
16020440048300000175025160117100160017100160000500128000004002040039400481997331999716010020016000020048000040048400391116020110099100100160000100001011011711400451600001004004040049400404004940040
160204400393000001770625160100100160000100160000500128000004002140039400391997331999716010020016000020048000040039400481116020110099100100160000100001011011611400451600001004004040049400404004940040
16020440048300000174125160117100160017100160000500128000004002940048400391997332000616010020016000020048000040039400481116020110099100100160000100001011011611400451600001004004040049400404004040049
1602044003930000004125160117100160000100160000500128000004002940048400481997332002416010020016000020048000040039400391116020110099100100160000100001011011611400451600001004004940040400504004940040
1602044003930000004125160100100160000100160000500239899904002940048400391997331999716010020016000020048000040039400481116020110099100100160000100001011011611400451600001004004040040400404004940049

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2502

retire (01)cycle (02)0318191e1f373a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002440049299000000462516001010160000101600005012800001110400204003940039199963200281600102016000020480000400394003911160021109101016000010023410024168229163222628400364113160000104004040040400504004040040
160024400483000000005825160010101600001016000050128000001104002040039400391999632001916001020160000204800004003940039111600211091010160000100010026168225163222627400364113160000104004040040400404004040040
160024400393000000015825160010101600181016000050128000001104003040039400391999632001916001020160000204800004003940039111600211091010160000100010026168225163221626400364113160000104004040040400404004040040
160024400483000000015825160010101600001016000050243886501104002040039400391999632001916001020160000204800004004940049111600211091010160000100010026168225163222127400364113160000104004040040400404004040040
160024400393000000015825160010101600001016000050128000001104002040039400391999632002016001020160000204800004003940039111600211091010160000100010024169228163222725400364113160000104004040040400404004040040
160024400393000000015825160028101600001016000050243886501104002040039400391999632001916001020160000204800004003940039111600211091010160000100010026168229163221728400364113160000104005040040400404004040040
160024400392990000005825160010101600001016000050128000001104002040039400391999632001916001020160000204800004003940039111600211091010160000100010026168226163222727400364113160000104004040040400404004040040
1600244004930000001815225160010101600001016000050128000001104002040048400481999632002916001020160000204800004003940039111600211091010160000100010026168229163222420400364113160000104004040050400404004040040
160024400393000000015825160010101600001016000050128000001104003040039400391999632001916001020160000204800004003940039111600211091010160000100010026168227163221427400364113160000104004040040400404004040040
160024400483000000005225160010101600001016000050128000001104002040039400391999632001916001020160000204800004003940039111600211091010160000100010024169226163222914400454113160000104004040040400404004040040