Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

UMLAL (vector, 2D)

Test 1: uops

Code:

  umlal v0.2d, v1.2s, v2.2s
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
100430372206125482510001000100039831313018303730372415328951000100030003037303711100110000073216112630100030383038303830383038
100430372308225482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372206125482510001000100039831303018303730372415328951000100030003037303711100110001073116112630100030383038303830383038
100430372306125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372296125482510001000100039831313018303730372415328951000116830003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372366125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372206125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038

Test 2: Latency 1->1

Code:

  umlal v0.2d, v1.2s, v2.2s
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030818191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102043003722500000010329548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000710121623296340100001003003830038300383003830038
102043003722500000014729548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000710121623296340100001003003830038300383003830038
102043003722500000044029548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000710121622296340100001003003830038300383003830038
10204300372250000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000710121623296340100001003003830038300383003830038
10204300372250000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000710121623296340100001003003830038300383003830038
10204300372250000016129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000710121623296340100001003003830038300383003830038
10204300372250000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000710121623296340100001003003830038300383003830038
10204300372250000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100030710121622296340100001003003830038300383003830038
102043003722500000034629548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000710121622296340100001003003830038300383003830038
10204300372240000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100002754123222297063100001003013230133301343013430085

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03080b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2c5branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037225000000027829548251001010100001010000504277313130018300373003728287328767100102010162203000030037300371110021109101010000100000000006402162229630010000103003830038300383003830038
100243003722500000006129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100000000006402162229630010000103003830038300383003830038
1002430037224000000012429548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100000000006402162229630010000103003830038300383003830038
10024300372240000120041929548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100000000006402162229630010000103003830038300383003830038
100243003722500000007129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100000000006402162229630010000103003830038300383003830038
1002430037225000000019129548251001010100001010000504277313130018300373003728287728767100102010000203000030037300371110021109101010000100000000006402163229630010000103003830038300383003830038
100243003722500000006129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100000000006402162229630010000103003830038300383003830038
1002430037225000000012429548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100000000006402162229630010000103003830038300383003830038
1002430037225000000042329548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000000006402162229630010000103003830038300383003830038
100243003722500000006129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100000030006402162229630010000103003830038300383003830038

Test 3: Latency 1->2

Code:

  umlal v0.2d, v0.2s, v1.2s
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102043003722502332954844101001001000010010000500427731330018030037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
102043003722501242954825101001001000010010000500427731330018030037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10204300372250612954825101001001000010010000500427731330018030037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
102043003722501242954825101001001000010010000500427731330018030037300372826532874510100200100002003000030037300371110201100991001001000010040071011611296340100001003003830038300383003830038
10204300372250612954825101001001000010010000500427731330018030037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10204300372250612954825101001001000010010000500427731330018030037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10204300372250612954825101001001000010010000500427731330018030037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10204300372250612954825101001001000010010000500427731330018030037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10204300372250612954825101001001000010010000500427731330018030037300372826532874510100200100002003000030037300371110201100991001001000010000171011611296345100001003003830038300383003830038
10204300372250612954825101001001000010010000627427731330018030084300372826532874510100204100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03181e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a8acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024300372250003092954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000006402162329630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000006402162329630010000103003830038300383003830038
10024300372250003992954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010200006402162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000006402162329630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000006402162229630010000103003830038300383003830038
10024300372250007262954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000006402162329630010000103003830038300383003830038
100243003722501201702954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000006402162329630010000103003830038300383003830038
10024300372250013802954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010001006402242229738010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000006402162229630010000103003830038300383003830038
1002430037225100612954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000006402162229630010000103003830038300383003830038

Test 4: Latency 1->3

Code:

  umlal v0.2d, v1.2s, v0.2s
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030b1e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a9accdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204300372250061295482510100100100001001000050042773130300183003730037282653287451010020010167200300003003730037111020110099100100100001000003071011611296340100001003003830086300383003830038
102043003722500612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000710116112963428100001003003830038300383003830038
102043003722506961295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000071011611296340100001003003830038300383003830038
102043003722400145295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000071011611296340100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000071011611296340100001003003830038300383003830038
10204300372250061295482510100100100001001014950042773130300183003730037282653287451010020010182200300003003730037111020110099100100100001000000071011611296340100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000071011611296340100001003003830038300383003830038
10204300372240061295482510100100100001001000050042773130300183003730037282653287451010020010000204300003003730037111020110099100100100001000000071011611296340100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000040071011611296340100001003003830038300383003830086
10204300372240061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000071011611296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8a9acbranch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100243003722506129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006404164429630010000103003830038300383003830038
100243003722506129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006404164429630010000103003830038300383003830038
100243003722506129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000101000006403163429630010000103003830038300383003830038
10024300372240251295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000024006404164429630010000103003830038300383003830038
1002430037225972629548251001010100001010000504277313030018300373003728287328767100102010000203050130037300371110021109101010000100000006404164429630010000103003830038300383003830038
1002430037225072629548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006404164429630010000103003830038300383003830038
100243003722406129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006403163429630010000103003830038300383003830038
100243003722506129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100000006404164429630010000103003830038300383003830038
100243003722506129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100000006403164429630010000103003830038300383003830038
100243003722506129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006404164329630010000103003830038300383003830038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  umlal v0.2d, v8.2s, v9.2s
  movi v1.16b, 0
  umlal v1.2d, v8.2s, v9.2s
  movi v2.16b, 0
  umlal v2.2d, v8.2s, v9.2s
  movi v3.16b, 0
  umlal v3.2d, v8.2s, v9.2s
  movi v4.16b, 0
  umlal v4.2d, v8.2s, v9.2s
  movi v5.16b, 0
  umlal v5.2d, v8.2s, v9.2s
  movi v6.16b, 0
  umlal v6.2d, v8.2s, v9.2s
  movi v7.16b, 0
  umlal v7.2d, v8.2s, v9.2s
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)03080b1e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa9accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020420088150000392580100100800001008000050064000000200452006420064322801002008000020024000020064200641116020110099100100160000100001011111611200611600001002006520065200652006520065
160204200641500021392580100100800001008000050064000000200452006420064322801002008000020024000020064200641116020110099100100160000100001011111611200611600001002006520065200652006520065
160204200641500039622580100100800001008000050064000000200452006420064322801002008000020024000020064200641116020110099100100160000100001011111611200611600001002006520065200652006520065
16020420064150000392580100100800001008000050064000001200452006420064322801002008000020024000020064200641116020110099100100160000100001011111611200611600001002006520065200652006520065
16020420064150000392580100100800001008000050064000001200452006420064322801002008000020024000020064200641116020110099100100160000100001011111611200611600001002006520065200652006520065
16020420064151000392580100100800001008000050064000000200452006420064322801002008000020024000020064200641116020110099100100160000100001011111611200611600001002006520065201462006520065
16020420064151000392580100100800001008000050064000001200452006420064322801002008000020024000020064200641116020110099100100160000100001011111611200611600001002006520065200652006520065
16020420064150000392580100100800001008000050064000000200452006420064322801002008000020024000020145200641116020110099100100160000100001011111611200611600001002006520065200652006520065
160204200641500030392580100100800001008000050064000001200452006420064322801002008000020024000020064200641116020110099100100160000100001011111611200611600001002006520065200652006520065
16020420064150000392580100100800001008000050064000001200452006420064322801002008000020024000020064200641116020110099100100160000100001011111611200611600001002006520065200652006520065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)03181e3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8accfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024200941500004527800121280000128000062640000110200322005120051322800122080000202400002005120051111600211091010160000100001004081114252111111200482201160000102005220052200522005220052
160024200511510004527800121280000128000062640916100200322005120051322800122080000202400002005120051111600211091010160000100061110030361725211157200482201160000102005220052200522005220052
16002420051150000452780012128000012800006264000010020032200512005132280012208000020240000200512005111160021109101016000010000100301311725211108200482201160000102005220052200522005220052
16002420051150000452780012128000012800006264000010020032200512005132280012208000020240000200512005111160021109101016000010000100303111125211117200482201160000102005220052200522005220052
16002420051150000452780012128000012800006264000010020032200512005132280012208000020240000200512005111160021109101016000010000100303511125211138200482202160000102005220052200522005220052
1600242005115000045278001212800001280000626400001010200322005120051322800122080000202400002005120051111600211091010160000100001003436111252111511200483201160000102005220052200522005220052
16002420051151000452780012128000012800006264000011020032200512005132280012208000020240000200512014011160021109101016000010000100341361825211117200482201160000102005220052200522005220052
160024200511500004527800121280000128000062640000111020032200512005132280012208000020240000200512005111160021109101016000010000100361311725211117200482201160000102005220052200522005220052
1600242005115000045278001212800001280000626400001110200322005120051322800122080000202400002005120051111600211091010160000100001003013611225211157200482201160000102005220052200522005220052
160024200511500006362780012128000012800006264000011102003220051200513228001220800002024000020051200511116002110910101600001000010038136111252111511200482201160000102005220052200522005220052

Test 6: throughput

Count: 16

Code:

  umlal v0.2d, v16.2s, v17.2s
  umlal v1.2d, v16.2s, v17.2s
  umlal v2.2d, v16.2s, v17.2s
  umlal v3.2d, v16.2s, v17.2s
  umlal v4.2d, v16.2s, v17.2s
  umlal v5.2d, v16.2s, v17.2s
  umlal v6.2d, v16.2s, v17.2s
  umlal v7.2d, v16.2s, v17.2s
  umlal v8.2d, v16.2s, v17.2s
  umlal v9.2d, v16.2s, v17.2s
  umlal v10.2d, v16.2s, v17.2s
  umlal v11.2d, v16.2s, v17.2s
  umlal v12.2d, v16.2s, v17.2s
  umlal v13.2d, v16.2s, v17.2s
  umlal v14.2d, v16.2s, v17.2s
  umlal v15.2d, v16.2s, v17.2s
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2502

retire (01)cycle (02)0307080b18191e1f373f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602044004030000000900412516010110016000010016000050012800000400294003940040199733199981601002001600002004800004003940039111602011009910010016000010000000010110116114004501600001004004040040400404004040040
1602044003930000000300412516010010016000010016000050012800000400204003940039199733199971601002001600002004800004003940048111602011009910010016000010000000010110124114003601600001004004040040400494004040040
1602044003930000000000412516010010016000010016000050013199971400204004840048199733199971601002001600002004800004003940039111602011009910010016000010000000010110116114003601600001004004040040400904004940040
1602044003930000000000412516010010016000010016000050012800001400204003940048199733199971601002001600002004800004004840048111602011009910010016000010000000010110116114003601600001004004040049400404004040040
1602044003930000000000412516010010016000110016000050012800001400304003940040199733199971601002001600002004800004004040039111602011009910010016000010000000010110116114003601600001004004040040400404004040040
1602044004830000000000412516010010016000010016000050013199971400294003940039199733200071601002001600002004800004003940039111602011009910010016000010000000010110116114003601600001004004040040400404004040040
1602044003930000000000412516010010016000010016000050012800001400204003940039199733199971601002001600002004800004003940039111602011009910010016000010000000010110116114003601600001004004040040400494004040040
1602044003930000000000422516010010016000010016000050023990270400204004840039199733199971601002001600002004800004004840039111602011009910010016000010000000010110116114003601600001004004040040400404004040040
1602044004830000000000412516011710016000010016000050012800001400204003940039199733199971601002001600002004800004003940039111602011009910010016000010000000010110116114003601600001004004040040400404004040040
1602044004030000000000412516010010016000010016000050023989990400204004840039199733199971601002001600002004800004003940039111602011009910010016000010000000010110116114003601600001004004040040400404004940040

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)031e1f373f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600244004830030175525160010101600001016000050239899911400204003940039199963200191600102016000020480000400394003911160021109101016000010010022311181621117640036155160000104004040040400404004040040
16002440039300951004625160027101600171016000050239899911400204003940048199963200191600102016000020480000400394003911160021109101016000010010022311161621121740036155160000104004940040400494004040040
1600244004829935400472516001010160000101600005012800001140029400394003919996320028160010201600002048000040039400481116002110910101600001001002231161621161640045155160000104004040040400404004040040
1600244003930092700551161601951016001710160000501280000114002040039400481999632002816001020160000204800004004940039111600211091010160000100100223111616211161640036155160000104004040040400404004940040
1600244003930090901746251600101016000010160000501280000114002040039400391999632001916001020160000204800004003940048111600211091010160000100100223111616211161640036155160000104004040049400404004940040
16002440048300924005525160010101600001016000050128000011400294003940048199963200191600102016000020480000400484003911160021109101016000010010022311161621116640045155160000104004040049400404004940049
16002440039300000462516001010160000101600005012800001140029400394004820016320028160010201600002048000040039400391116002110910101600001001002231161621151640045155160000104004140049400494004040040
160024400393000006125160010101600001016000050239899901400294004840039199963200191600102016000020480000400394003911160021109101016000010010024622616422166400453010160000104004040040400494004040049
1600244004829971701752251600271016013210160000501280000014002040039400391999632001916001020160000204800004003940048111600211091010160000100100246221616422166400453010160000104004040049400404004940040
160024400393000005225160027101600171016000050128000001400204004840039199963200191600102016000020480000400394003911160021109101016000010010024622616422166400363010160000104004040049400404004940040