Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

UMLAL (by element, 4S)

Test 1: uops

Code:

  umlal v0.4s, v1.4h, v2.h[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)0309191e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a0a8a9accfd5d6dadbddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
1004303723000612548251000100010003983130301830373037241532895100010003000303730371110011000000247311600112630100030383038303830383038
100430372300061254825100010001000398313130183037303724153289510001000300030373037111001100000007311600112630100030383038303830383038
100430372300061254825100010001000398313130183037303724153289510001000300030373037111001100000067311600112630100030383038303830383038
1004303723000197254825100010001000398313130183037303724153289510001000300030373037111001100006007311600112630100030383038303830383038
100430372301961254825100010001000398313130183037303724153289510001000300030373037211001100004007311600112630100030383038303830383038
100430372300061254825100010001000398313030183037303724153289510001000300030373037111001100000007311600112630100030383038303830383038
100430372200061254825100010001000398313130183037303724153289510001000300030373037111001100000037311600112630100030383038303830383038
100430372300061254825100010001000398313030183037303724153289510001146300030373037111001100000037311600112630100030383038303830383038
100430372200061254825100010001000398313030183037303724153289510001000300030373037111001100000007311600112630100030383038303830383038
1004303722000612548251000100010003983130301830373037241532895100010003000303730371110011000021127311600112630100030383038303830383038

Test 2: Latency 1->1

Code:

  umlal v0.4s, v1.4h, v2.h[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10205300372250034429548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000710031622296340100001003003830038300383003830038
10204300372250016629548251010010010000106104475224277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000710021622296340100001003003830038300383003830038
10204300372240093329548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000710121622296340100001003003830038300383003830038
1020430037225006129548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000710121622296340100001003003830038300383003830038
10204300372250034129548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000710121622296340100001003003830038300383003830038
10204300372250035729548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000710121622296340100001003003830038300383003830038
102043003722530034029548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000710121622296340100001003003830038300383003830038
10204300372250076929548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000710121622296340100001003003830038300383003830083
10204300372250036129548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000710121622296340100001003003830038300383003830038
10204300372250042429548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000710121622296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03080b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037225000000240295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000000006402162229630010000103003830038300383003830038
10024300372250000904992295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000000006402162229630010000103003830038300383003830038
1002430037224000000564295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000000006402162229630010000103003830038300383003830038
1002430037225000000890295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000000006402162229630010000103003830038300383003830038
10024300372250000001036295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000000006402162229630010000103003830038300383003830038
1002430085224000000373295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000000006402162229630010000103003830038300383003830038
10024300372250000001216295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000000006402162329630010000103003830038300383003830038
10024300372250000001189295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000000006402162229630010000103003830038300383003830038
10024300372250000001091295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000000006402162229630010000103003830038300383003830038
10024300372240000001226295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000000006402162229630010000103003830038300383003830038

Test 3: Latency 1->2

Code:

  umlal v0.4s, v0.4h, v1.h[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03081e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102043003722500612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000071011611296340100001003003830038300383003830038
102043003723200103295482510100100100001001000050042773130300183008530274282692328798107262161084421432475302303017961102011009910010010000100220102164080071011611296340100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000071014011296340100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000071011611296340100001003003830038300383003830038
102043003722400612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000071011611296340100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000071011611296340100001003003830038300383003830038
102043008422500612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000071011611296340100001003003830038300383003830038
102043003722400612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000071011611296340100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000071011611296340100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000071011611296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)0318191e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037225000085629548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
100243003722500006129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
1002430037224000090129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
1002430037225000014529548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
1002430037225000085829548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
100243003722400006129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
10024300372250000210529548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
1002430037225000140929548251001010100001010000504277313030018300373003728302328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
1002430037225000020829548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
100243003722500008429548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100010839106402162229630010000103003830038300383003830038

Test 4: Latency 1->3

Code:

  umlal v0.4s, v1.4h, v0.h[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)0318191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a7a8a9accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020430037225005826129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007103161129634100001003003830038300383003830038
1020430037225004896129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500516129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
1020430037225005496129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
1020430037225005496129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
10204300372250006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
10204300372240006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
1020430037225005196129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
1020430037225004026129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
1020430037225004806129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8c5cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037225061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
1002430037225061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
1002430037225661295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
1002430037225061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000640316222963010000103008630038300383003830038
1002430037225061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
1002430037225061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
1002430037225061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
1002430037225061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
1002430037225061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
1002430037225082295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  umlal v0.4s, v8.4h, v9.h[1]
  movi v1.16b, 0
  umlal v1.4s, v8.4h, v9.h[1]
  movi v2.16b, 0
  umlal v2.4s, v8.4h, v9.h[1]
  movi v3.16b, 0
  umlal v3.4s, v8.4h, v9.h[1]
  movi v4.16b, 0
  umlal v4.4s, v8.4h, v9.h[1]
  movi v5.16b, 0
  umlal v5.4s, v8.4h, v9.h[1]
  movi v6.16b, 0
  umlal v6.4s, v8.4h, v9.h[1]
  movi v7.16b, 0
  umlal v7.4s, v8.4h, v9.h[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8accfd0d2d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602042006515112939258010010080000100800005006400000152004520064200643228010020080000200240000200642006411160201100991001001600001000001011150116112006101600001002006520065200652006520065
16020420064150039258010010080000100800005006400000152004520076200643228020520080000200240000200642006411160201100991001001600001000001011150116112006101600001002006520065200652006520065
160204200641502439258010010080000100800005006400000152004520064200643228010020080000200240000200642006411160201100991001001600001000001011150116112006101600001002006520065200652006520065
16020420064150039258010010080000100800005006400000152004520064200643228010020080000200240000200642006411160201100991001001600001000001011150116112006101600001002006520065200652006520065
16020420064150639258010010080000100800005006400000152004520064200643228010020080000200240000200642006411160201100991001001600001000001011151116112006101600001002006520065200652006520065
160204200641513339258010010080000100800005006400000152004520064200643228010020080000200240000200642006411160201100991001001600001000001011151116112006101600001002006520065200652006520065
160204200641501839258010010080000100800005006400000152004520064200643228010020080000200240000202252006411160201100991001001600001000001011150116112006101600001002006520065200652006520065
160204200641505139258010010080000100800005006400000152004520076200643228010020080000200240000200642006411160201100991001001600001000001011150116112006101600001002006520065200652006520065
160204200641502439258010010080000100800005006400000152004520064200643228010020080000200240000200642006411160201100991001001600001000001011150116112006101600001002006520065200652006520065
16020420064150044258010010080000100800005006400000152004520064200643228010020080000200240000200642006411160201100991001001600001000001011150116112006101600001002006520065200652006520065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)03040b18191e1f3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a7a8a9accdcfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024201031511000369004525800121280000128000062640000115200272004620046322800122080000202400002004620046111600211091010160000100000001004638142204112222200432150160000102005120047200472004720047
1600242004615000000004525800121280000128000062640000110200272004620046322800122080000202400002004620046111600211091010160000100000001004131115204112215200432150160000102004720047200512004720047
1600242004615000001200452580012128000012800006264000011102002720046200503228001220800002024000020046200461116002110910101600001000000010049167122202111526200432300160000102004720047200472004720047
160024200501510000000452580012128000012800006264000011102002720046200463228001220800002024000020050200461116002110910101600001000000010039168115202112215200472150160000102004720047200472004720047
1600242004615000000007172580012128000012800006264000011102002720046200463228001220800002024000020046200461116002110910101600001000000010039138115202112214200432150160000102004720047200472004720047
160024200461500000000452580012128000012800006264000011102002720046200463228001220800002024000020046200461116002110910101600001000000010038138115202112216200432150160000102004720047200472004720047
16002420046150000018300452580012128000012800006264000011102002720046200463228001220800002024000020046200461116002110910101600001000000010041138116202112215200432150160000102004720047200472004720047
160024200461500000000452580012128000012800006264000011102002720046200463228001220800002024000020046200461116002110910101600001000000010045138116202112415200432150160000102004720047200472004720047
1600242004615000004800452580012128000012800006264000010102002720046200463228001220800002024000020046200461116002110910101600001000000010045138122202112622200432150160000102004720047200472004720047
160024200461500000000452580012128000012800006264000011102002720046200463228001220800002024000020046200461116002110910101600001000000010041138122202111625200432150160000102004720047200472004720047

Test 6: throughput

Count: 12

Code:

  umlal v0.4s, v12.4h, v13.h[1]
  umlal v1.4s, v12.4h, v13.h[1]
  umlal v2.4s, v12.4h, v13.h[1]
  umlal v3.4s, v12.4h, v13.h[1]
  umlal v4.4s, v12.4h, v13.h[1]
  umlal v5.4s, v12.4h, v13.h[1]
  umlal v6.4s, v12.4h, v13.h[1]
  umlal v7.4s, v12.4h, v13.h[1]
  umlal v8.4s, v12.4h, v13.h[1]
  umlal v9.4s, v12.4h, v13.h[1]
  umlal v10.4s, v12.4h, v13.h[1]
  umlal v11.4s, v12.4h, v13.h[1]
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)030818191e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a7a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
12020430132225000000416713251201181001200011001200005009600001317293174830039149733149971201002001200002003600003174830039111202011009910010012000010000000007610216113003601200001003004030041300403004130040
120204300392380000004167132512011810012001810012000050042943631300203003931748166533149971201002001200002003600003174830039111202011009910010012000010000100007610116113003901200001003174930314300413126631749
120204306292251000004102512010110012001810012000050042943631300203003931748149733149981201002001200002003600003174830039111202011009910010012000010000000007610116113003601200001003004031749300403174930040
1202043003923800000186102512010010012000010012000050011398680300203003930039149733149971201002001200002003600003004131748111202011009910010012000010000000007610116113174501200001003004031749300403174930040
1202043003923800000186102512010010012000010012000050043940610317293174830039149733149971201002001200002003600003003931748111202011009910010012000010000010007610116113174501200001003004031749300403174930040
120204300392380000018820251201181001201001001200005009600000300203003931748166533167061201002001200002003600003004030039111202011009910010012000010000000307634116113003701200001003004031749300403174930040
120204300392380000018610251201011001200171001200005009600001300203003931748166533167061201002001200002003600003174830039111202011009910010012000010000000007610116113174501200001003004031749300403174930040
12020430039238000000416713251201181001200011001200005009600000300203003931748166533167061201002001200002003600003003931748111202011009910010012000010000000007610116113003701200001003004030041300403004130040
120204300392370000018616713251201181001200181001200005009900000300203003931748149733167061201002001200002003600003174830039111202011009910010012000010000000307610116113174501200001003174930040317493004031749
12020431748225000000610251201001001200001001200005009600000300203003930039149733149971201002001200002003600003003930040111202011009910010012000010000000007610116113003601200001003174930040317493004031749

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acc5cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
12002430048230046025120010101200001012000050960000113002030039300391499631501912001020120000203600003003931010111200211091010120000100001752231121621111300360155120000103004030040300403004030040
12002430039225046025120010101200001012000050960000213002030039300391499631501912001020120000203600003003930039111200211091010120000100100752231111621111300360155120000103004030040309233092330040
12002430039225046025120010101200001012000050960000213002030039300391499631501912001020120000203600003003930039111200211091010120000100000752231211621111300360155120000103004030040300403004030040
12002430039225046025120010101200001012000050960000213002030039300391499631501912001020120000203600003003930039111200211091010120000100000752231111621111300360155120000103004030040300403004030040
12002430039225046025120010101200001012000050960000113002030039300891499631501912001020120000203600003003930039111200211091010120000100000752231111621111300360155120000103004030040300403004030040
12002430039225046025120010101200001012000050960000213002030039300391499631501912001020120000203600003003930039111200211091010120000100000752231111621111300360155120000103004030040300403004030040
120024300392250236025120010101200001012000050960000113002030039300391499631501912001020120000203600003092230039111200211091010120000100000752231111621111300360155120000103004030040300403004030040
12002430039225046025120010101200001012000050960000213002030039300391565231501912001020120000203600003003930039111200211091010120000100000752231111621111300360155120000103004030040300403004030040
12002430041224046025120010101200001012000050990000213002030039300391499631501912001020120000203600003003930039111200211091010120000100000752231111621111300360155120000103004030040300403004030040
12002430039225046025120010101200001012000050960000213002030039300391499631501912001020120000203600003003930039111200211091010120000100000752231111621111300360155120000103004030040300403004030040