Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

MLA (by element, 2S)

Test 1: uops

Code:

  mla v0.2s, v1.2s, v2.s[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03191e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a1a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
10043037250061254825100010001000398313030183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
10043037240961254825100010001000398313130183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
10043037240061254825100010001000398313130183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
10043037240061254825100010001000398313130183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
10043037250084254825100010001000398313030183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
10043037240061254825100010001000398313130183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
10043037240061254825100010001000398313030183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
10043037250061254825100010001000398313030183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
10043037240061254825100010001000398313130183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
10043037240061254825100010001000398313030183037303724153289510001000300030373037111001100000073116112630100030383038303830383038

Test 2: Latency 1->1

Code:

  mla v0.2s, v1.2s, v2.s[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03090b18191e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8a9acc2cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020430037224000000612954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100000000710021623296340100001003003830038300383003830038
102043003722500003000612954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100000000710121622296340100001003003830038300383003830038
1020430037225000000612954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100000000710121622296340100001003003830038300383003830038
102043003722500004740612954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100000000710121622296340100001003003830038300383003830038
102043003722500004800612954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100000000710121622296340100001003003830038300383003830038
1020430037224000042905992954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100000000710121622296340100001003003830038300383003830038
1020430037225000000612954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100000000710121622296340100001003003830038300383003830038
102043003722400004950612954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100000000712122522296340100001003003830038300383003830038
102043003722500005130612954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100000030710121622296340100001003003830038300383003830038
102043003722500004710612954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100000000710121622296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100243003722500096061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000006402162229630010000103003830038300383003830038
1002430037225000525061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000006402162229630010000103003830038300383003830038
1002430037225000120170295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000006402162229630010000103003830038300383003830038
1002430037225000444061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000006402162329630010000103003830038300383003830038
10024300372250002850726295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000006402162229630010000103003830038300383003830038
1002430037225000447061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000006403162229630010000103003830038300383003830038
1002430037225000456061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000006402163229630010000103003830038300383003830038
1002430037225000411061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000006403163229630010000103003830038300383003830038
10024300372240000061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000006402162229630010000103003830038300383003830038
10024300372250003061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000006402162229630010000103003830038300383003830038

Test 3: Latency 1->2

Code:

  mla v0.2s, v0.2s, v1.s[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03080b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102043003722500003600612954825101001001000010010000500427731303001803003730037282653287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
10204300372250000000612954825101001001000010010000500427731303001803003730037282653287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
10204300372250000600612954825101001001000010010000500427731303001803003730037282653287451026220010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
102043003722500003900612954825101001001000010010000500427731303001803003730037282653287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
102043003722500001800612954825101001001000010010000500427731303001803003730037282653287451010020010000204300003003730037111020110099100100100001000000000071011621296340100001003003830038300383003830038
102043003722400005400612954825101001001000010010000500427731303001803003730037282653287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
10204300372250000600612954825101001001000010010000500427731303001803003730037282653287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
10204300372250000600612954825101001001000010010000500427731303001803003730037282653287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
102043003722600006007262954825101001001000010010000500427731303001803003730037282653287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
102043003722500003005362954825101001001000010010000500427731313001803003730037282653287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030b1e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8acc3cdcfd5d6ddinst fetch restart (de)e0ec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024300372250180822954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
1002430037225018612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100003006402162229630010000103003830038300383003830038
1002430037225036612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
10024300372250468612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
1002430037225015822954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630110000103003830038300383003830038
1002430037225012612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
1002430037225033612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
100243003722506612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383008430038
1002430037225066612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
1002430037225015612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000104020006402162229630010000103003830038300383003830038

Test 4: Latency 1->3

Code:

  mla v0.2s, v1.2s, v0.s[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03081e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acbranch mispredict (cb)cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102043003722500612954825101001001000010010000500427731302300183003730037282653287451010020010000200300003003730037111020110099100100100001000960710221622296340100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731312300183003730037282653287451010020010000200300003003730084111020110099100100100001000840710221622296340100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773131030018300373003728265328745101002001000020030000300373003711102011009910010010000100400710221622296340100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731312300183003730037282653287451010020010000200300003003730037111020110099100100100001000270710221622296340100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731310300183003730037282653287451010020010000200300003003730037111020110099100100100001000360710221622296340100001003003830038300383003830038
1020430037224007132954825101001001000010010000500427731312300183003730037282653287451010020010000200300003003730037111020110099100100100001000150710221622296340100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731302300183003730037282653287451010020010000200300003003730037111020110099100100100001000870710221622296340100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773131230018300373003728265328745101002001000020030000300373003711102011009910010010000100000710221622296340100001003003830038300383008530038
10204300372250105612954825101001001000010010000500427731312300183003730037282653287451010020010000200300003003730037111020110099100100100001000360710021622296340100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773130230018300373003728265328762101002001000020030000300373003711102011009910010010000100000710221622296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030b1e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)6061696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a8acc2cfd0d2d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024300372250010529548251001010100001010000504277313003001803003730037282873287671001020100002030000300373003711100211091010100001000060640003162229630010000103003830038300383003830038
10024300372250061295482510010101000010100005042773130030018030037300372828732876710010201000020300003003730037111002110910101000010000930640002162229630010000103003830038300383003830038
10024300372250061295482510010101000010100005042773130030018030037300372828732876710010201000020300003003730037111002110910101000010000690640002162229630010000103003830038300383003830038
10024300372250061295482510010101000010100005042773131030018030037300372828732876710010201000020300003003730037111002110910101000010000990640002162229630010000103003830038300383003830038
10024300372240061295482510010101000010100005042773131030018030037300372828732876710010201000020300003003730037111002110910101000010000900640002162229630010000103003830038300383003830038
100243003722500232295482510010101000010100005042773131030018030037300372828732876710010201000020300003003730037111002110910101000010000810640002162229630010000103003830038300383003830038
1002430037224006129548251001010100001010000504277313103001803003730037282873287671001020100002030000300373003711100211091010100001000030640002162229630010000103003830038300383003830038
10024300372240061295482510010101000010100005042773131030018030037300372828732876710010201000020300003003730037111002110910101000010000960640002162229630010000103003830038300383003830038
10024300372250061295482510010101000010100005042773131030018030060300372828732876710010201000020300003003730037111002110910101000010000870640002162229630010000103003830038300383003830038
10024300372250061295482510010101000010100005042773131030018030037300372828732876710010201000020300003003730037111002110910101000010000900640002162229630010000103003830038300383003830038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  mla v0.2s, v8.2s, v9.s[1]
  movi v1.16b, 0
  mla v1.2s, v8.2s, v9.s[1]
  movi v2.16b, 0
  mla v2.2s, v8.2s, v9.s[1]
  movi v3.16b, 0
  mla v3.2s, v8.2s, v9.s[1]
  movi v4.16b, 0
  mla v4.2s, v8.2s, v9.s[1]
  movi v5.16b, 0
  mla v5.2s, v8.2s, v9.s[1]
  movi v6.16b, 0
  mla v6.2s, v8.2s, v9.s[1]
  movi v7.16b, 0
  mla v7.2s, v8.2s, v9.s[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)030b18191e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602042009115000000622580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000001011111611200611600001002006520065200652006520065
160204200641500010264392580100100800001008000050064000002004520064200643228053820080000200240000200642006411160201100991001001600001000001011111611200611600001002006520065200652006520065
1602042006415100000812580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000001011111611200611600001002006520065200652006520065
1602042006415110000392580100100800001008000050064000002004520064200643228033020080000200240000200642006411160201100991001001600001000001011111611200611600001002006520065200652006520065
16020420064150000120392580100100800001008000050064000002004520305200643228010020080000200240000200642006411160201100991001001600001000201011111611200611600001002006520065200652006520065
16020420064150000003925801001008000010080000500640000020045200642006432280100200800002002400002006420064111602011009910010016000010000121011111611200611600001002006520065200652006520065
16020420064150001120392580100100800001008000050064000002004520064200643228022920080000200240000200642006411160201100991001001600001009001011111611200611600001002006520065200652006520065
1602042006415000000392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000031011111611200611600001002006520065200652006520065
1602042006415000000392580100123800001008000050064000002004520064200643228010020080417200240000200642006411160201100991001001600001000001011111611200611600001002006520065200652006520146
1602042006415000000392580100100800001188000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000001011111611200611600001002006520065200652006520065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)03181e3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9acc5cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024200871500014510680012128000012800006264000010102003220051200513228001220800002024000020051200511116002110910101600001010001004034115252111618200482201160000102005220052200522005220052
1600242005115002104580800121280000128000062640000111020032200512005132280012208000020240000200512005111160021109101016000010606010039136115252111518200482201160000102005220052200522005220052
160024200511501004564800121280000128000062640000100200322005120051322800122080000202400002005120051111600211091010160000100033010037136118832111614200482201160000102005220052200522005220052
16002420051150000456980012128000012800006264000011020032200512005132280012208000020240000200512005111160021109101016000010700010041134114252111516200482201160000102005220052200522005220052
1600242005115000045578001212800001280000626400001010200322005120051322800122080000202400002005120051111600211091010160000104103010041134116252111617200482201160000102005220052200522005220052
16002420051150000455180012128000012800006264000010020032200512005132280012208000020240000200512005111160021109101016000010370102010041134118252111618200482201160000102005220052200522005220052
1600242005115000045578001212800001280000626400001010200322005120051322800122080000202400002005120051111600211091010160000103100010041135118252111617200482201160000102005220052200522005220052
160024200511500004563800121280000128000062640000111020032200512005132280012208000020240000200512005111160021109101016000010000010039135116252111816200482201160000102005220052200522005220052
1600242005115000045508001212800001280000626400001010200322005120051322800122080000202400002005120051111600211091010160000105103010040131115252111816200482201160000102005220052200522005220052
1600242005115009066828001212800001280000626400001102003220051200513228001220800002024000020051200511116002110910101600001000108010037136116252111619200482201160000102005220052200522005220052

Test 6: throughput

Count: 12

Code:

  mla v0.2s, v12.2s, v13.s[1]
  mla v1.2s, v12.2s, v13.s[1]
  mla v2.2s, v12.2s, v13.s[1]
  mla v3.2s, v12.2s, v13.s[1]
  mla v4.2s, v12.2s, v13.s[1]
  mla v5.2s, v12.2s, v13.s[1]
  mla v6.2s, v12.2s, v13.s[1]
  mla v7.2s, v12.2s, v13.s[1]
  mla v8.2s, v12.2s, v13.s[1]
  mla v9.2s, v12.2s, v13.s[1]
  mla v10.2s, v12.2s, v13.s[1]
  mla v11.2s, v12.2s, v13.s[1]
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)030708090a0b1e373a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8a9accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
120204302272341011000251025120117100120000100120000500990000300203003930039149733167321201002001200002003600003003930039111202011009910010012000010003100761412161111300361200001003004030040300403004030040
12020430039225100100025102512010010012000010012000050096000030020300413003914973314997120100200120000200360000300393003911120201100991001001200001000300676148161010300361200001003004030040300403004030040
120204300392251001000264025120100100120000100120000500960000300203003930089149733149971201002001200002003600003004030042111202011009910010012000010003302053761411161110300361200001003004030040300403004030043
1202043003922510010186025102512011710012000010012000050096000030020300393003914973314997120100200120000200360000300393003911120201100991001001200001000303761410161011300361200001003004030040300403175430923
12020430039224100100025102512010010012000010012000050096000030020300393175014973314997120100200120000200360000300393003911120201100991001001200001000100761410161011300361200001003004030040300413004030040
1202043003922510010079251025120100100120001100120000500960000300203003930039149733149971201002001200002003600003003930039111202011009910010012000010003703761411161010300361200001003004030040300413004230040
12020430039225100100025102512010010012000010012000050096000030020300393003914973314997120100200120000200360000300393003911120201100991001001200001000100761411161011300361200001003004030040300403004030040
120204300392241001000251025120100100120000100120000500960000300203003930039158283149971201002001200002003600003003930039111202011009910010012000010001063761411161111300361200001003004030040300403004030040
120204300392251001000215102512010010012001710012000050096000030020300393003914973314997120100200120000200360000300393003911120201100991001001200001000000761410161011300361200001003004030040300403004030040
1202043003922510010002510251201171001200001001200005009600003002030039300391497331500012010020012000020036000030039300391112020110099100100120000100010076148161111300371200001003004030040300413004030040

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)03081e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8acc5cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
12002430040225004625120010101200001012000050960000113002030039300391499631501912001020120000203600003003930039111200211091010120000100000752462229164115730036206120000103004030040300403004030040
1200243003922500522512001010120017101200005096000011300203003930039149963150191200102012000020360000300393003911120021109101012000010000075246221416422148300364012120000103004030040300403004030040
12002430039225005225120010101200001012000050960000113002030039300391499631501912001020120000203600003003930039111200211091010120000100000752231191621181430036206120000103004030040300403004030040
120024300392250046251200101012000010120000509600000130020300393003914996315019120010201200002036000030039300391112002110910101200001000007522311131621151330036206120000103004030040300403004030040
12002430039224104625120010101200001012000050960000013002030039300391667031501912001020120000203600003003930039111200211091010120000100000752431171621166300362012120000103004030040300403004030040
12002430039224004625120010101200001012000050960000013002030039309221499631501912001020120000203600003003930039111200211091010120000100000752232181621111430036206120000103004030040300403004030040
120024300392250275225120010101200001012000050960000013002030039300391499631501912001020120000203600003003930039111200211091010120000100001752231151621156300362013120000103004030040300403004030040
12002430922225004625120010101200001012000050960000113002030039300391499631501912001020120000203600003003930039111200211091010120000100000752231171621161330036206120000103004030040300403004030040
1200243003922500462512001010120000101200005096000011300203003930039149963150191200102012000020360000300393003911120021109101012000010000075223115162117730036206120000103004030040300403004030040
1200243003922500462512001010120000101200005096000011300203003930039149963150191200102012000020360000300393003911120021109101012000010000075223115162116630036206120000103004030040300403004030040