Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SQRDMLSH (by element, H)

Test 1: uops

Code:

  sqrdmlsh h0, h1, v2.h[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e1f3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)9ea8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
1004303723006125482510001000100039831330183037303724153289510001000300030373037111001100001073116112630100030383038303830383038
1004303723006125482510001000100039831330183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
1004303723006125482510001000100039852630183037303724153289510001000300030373037111001100020373116112630100030383038303830383038
1004303723006125482510001000100039831330183037303724153291410001000300030373037111001100000073116112630100030383038303830383038
1004303723006125482510001000100039831330183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
1004303722006125482510001000100039831330183037303724153289510001000300030373037111001100003073116112630100030383038303830383038
1004303723006125482510001000100039831330183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
1004303722006125482510001000100039831330183037303724153289510001000300030373037111001100000073116112630100030383038303830383038
1004303722006125482510001000100039831330183037303724153289510001000300030373037111001100000373116112630100030383038303830383038
1004303723006125482510001000100039831330183037303724153289510001000300030373037111001100000073116112630100030383038303830383038

Test 2: Latency 1->1

Code:

  sqrdmlsh h0, h1, v2.h[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8acbranch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ea? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020430037224000000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000007100216222963400100001003003830038300383003830038
1020430037225000000612954825101001001000010010000500427731313001830037300372826532874510100204100002003000030037300371110201100991001001000010000000007100216222963400100001003003830071300713003830038
1020430037225000000612954825101001031000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000030007101216222963400100001003003830038300383003830038
1020430037225000000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000007101216222963400100001003003830038300383003830038
1020430037225000000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000007101216222963400100001003003830038300383003830038
1020430037233000000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000007101216222963400100001003003830038300383003830038
1020430037225000000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000007121216222963400100001003003830038300383003830038
1020430037225000000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000007101216232963400100001003003830038300383003830038
1020430037225000000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000007101216222963400100001003003830038300383003830038
1020430037224000900612952944101001031000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010020001790007311216222963430100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03081e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8a9acc2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024300372250000105529548251001810100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100008340006404162229630010000103003830038300383003830038
100243003722500006129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402163229630010000103003830038300383003830038
1002430037224000010329548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
100243003723300008229548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
1002430037225000016629548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
1002430037225000016629548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
1002430037225100016829548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402163329630010000103003830038300383022830038
1002430037224000012429548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
1002430037225000014529548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
1002430037233015006129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038

Test 3: Latency 1->2

Code:

  sqrdmlsh h0, h0, v1.h[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)0318191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8acc2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204300372250024006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000000071011611296340100001003003830038300383003830038
102043003722500225006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000000071011611296340100001003003830038300383003830038
1020430037225000006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000071011611296340100001003003830038300383003830038
1020430037224006006129548251010010010000100100005004277313130018300843008428265328745101002001000020030000300373003711102011009910010010000100000000071011611296340100001003003830038300383003830038
102043003722500378006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000071011611296340100001003003830038300383003830038
102043003722500423006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000071011611296340100001003003830038300383003830038
102043003722500324006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000071011611296340100001003003830038300383003830038
102043003722400363006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000071011611296340100001003003830038300383003830038
10204300372240024006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000071011611296340100001003003830038300383003830038
1020430037225000006129548251010010010000100101485004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000071011611296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037225006129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001030640216222963010000103003830038300383003830038
1002430037225006129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
1002430037225006129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
1002430037225006129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
1002430037224006129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
1002430037224006129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
1002430037225006129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
1002430037225006129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
1002430037225006129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
1002430037224006129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038

Test 4: Latency 1->3

Code:

  sqrdmlsh h0, h1, v0.h[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030b1e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102043003722500612954825101001001000010010000613427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830083
102043003722500612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
1020430037224001052954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
102043003722510612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010001071011611296340100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
1020430037225006129548251010010010000100100005004277313030018300373003728265328745101002001000020030540300373003711102011009910010010000100022471011611296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030a0b18191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100243003722500000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006403163329630010000103003830038300383003830038
100243003722500000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006403163329630010000103003830038300383003830038
100243003722500000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006403163329630010000103003830038300383003830038
100243003722500000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006403163329630010000103003830038300383003830038
100243003722500100612954825100101010000101000050427731330054300373003728287328767100102010000203000030037300371110021109101010000100106403163329630010000103003830038300383003830038
1002430037225000060822954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006403163329630010000103003830038300383003830038
100243003722500000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006403163329630010000103003830038300383003830038
100243003722500000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006403163329630010000103003830038300383003830038
100243003722500000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006403163329630010000103003830038300383003830038
100243003722500000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006403163329630010000103003830038300383003830038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  sqrdmlsh h0, h8, v9.h[1]
  movi v1.16b, 0
  sqrdmlsh h1, h8, v9.h[1]
  movi v2.16b, 0
  sqrdmlsh h2, h8, v9.h[1]
  movi v3.16b, 0
  sqrdmlsh h3, h8, v9.h[1]
  movi v4.16b, 0
  sqrdmlsh h4, h8, v9.h[1]
  movi v5.16b, 0
  sqrdmlsh h5, h8, v9.h[1]
  movi v6.16b, 0
  sqrdmlsh h6, h8, v9.h[1]
  movi v7.16b, 0
  sqrdmlsh h7, h8, v9.h[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)0307080a0b18191e1f3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6e74scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204200891500000000003925801001008000010080000500640000020045200642006401744801002008031320024000020064200641116020110099100100160000100000000135010111216112006101600001002006520065200652006520065
160204200641500000000003925801001008000010080000500640000020045200642006403228010020080000200240000200642006411160201100991001001600001000000103010111116112006101600001002006520065200652006520065
16020420064150000000000392580100100800001008000050064000002004520064200640322801002008000020024000020064200641116020110099100100160000100000000123010111116112006101600001002006520065200652006520065
1602042006415000000090033725801001008000010080000500640000120045200642006403228010020080000200240000200642006411160201100991001001600001000000000010111116112006101600001002006520065200652006520065
160204200641500000000003925801001008000010080000500640000020045200642006403228010020080000200240000200642006411160201100991001001600001000000000010111116112006101600001002006520065200652006520065
160204200641500000000003925801001008000010080000500640000120045200642006403228010020080000200240000200642006411160201100991001001600001000000103010111116112006101600001002006520065200652006520065
160204200641510000000003925801001008000010080000500640000120045200642006403228010020080000200240000200642006411160201100991001001600001000000000010111116112006101600001002006520065200652006520065
160204200641520000000003925801001008000010080000500640000020045200642006403228010020080000200240000200642006411160201100991001001600001000000203010111116112006101600001002006520065200652006520065
160204200641510000000003925801001008000010080000500640000120045200642006403228010020080000200240000200642006411160201100991001001600001000000100010111116112006101600001002006520065200652006520065
160204200641500000000003925801001008000010080000500640000120045200642006403228010020080000200240000200642006411160201100991001001600001000000103010111116112006101600001002006520065200652006520065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)030b181e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002420075155001206827800121280000128000062640000111020032200512005132280012208000020240000200512005111160021109101016000010371210037162211344226122005724002160000102006120061200612006120061
1600242006015000005167800121280000128000062640000011020041200602006032280012208000020240000200602006011160021109101016000010291051003216926344226112005724002160000102006120061200612006120052
16002420051150000045788001212800001280000626400000110200322006020051322800122080000202400002005120051111600211091010160000102991003413817342211272005724002160000102006120052200612005220052
160024200511500000456480012128000012800006264000011102004120060200513228001220800002024000020051200601116002110910101600001000100341371112521112122004822001160000102005220052200522005220052
16002420051150000045768001212800001280000626400000110200412005120060392280012208000020240000200512006011160021109101016000010001003016918342216112004824002160000102005220061200522006120052
16002420060150000045768001212800001280000626400001102004120060200603228001220800002024000020060200601116002110910101600001000100371692123442212102005724002160000102006120061200612006120061
16002420069150000051578001212800001280000626400000110200412006020060322800122080000202400002006020060111600211091010160000103531003816927344228122005724002160000102006120061200612006120070
160024200601500000516080012128000012800006264000001102004120060200603228001220800002024000020060200601116002110910101600001029310038169212344227122005724002160000102006120061200612006120052
1600242005115000008784800121280000128000062640000011020041200602006032280012208000020240000200602006011160021109101016000010031003416928344221482005724002160000102006120061200612006120052
160024200601500012051758001212800001280000626400000110200412006020060322800122080000202400002006020060111600211091010160000101310039169212344227132005724002160000102006120061200612006120052

Test 6: throughput

Count: 12

Code:

  sqrdmlsh h0, h12, v13.h[1]
  sqrdmlsh h1, h12, v13.h[1]
  sqrdmlsh h2, h12, v13.h[1]
  sqrdmlsh h3, h12, v13.h[1]
  sqrdmlsh h4, h12, v13.h[1]
  sqrdmlsh h5, h12, v13.h[1]
  sqrdmlsh h6, h12, v13.h[1]
  sqrdmlsh h7, h12, v13.h[1]
  sqrdmlsh h8, h12, v13.h[1]
  sqrdmlsh h9, h12, v13.h[1]
  sqrdmlsh h10, h12, v13.h[1]
  sqrdmlsh h11, h12, v13.h[1]
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)031e373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1202043011323100410251201001001200011001200005009600000300203003930039149733149971201002001200002003600003004230039111202011009910010012000010030761011611300361200001003004030040300403004030043
1202043003922500410251201001001200011001200005009600000309243003930039149733149971201002001200002003600003003930042111202011009910010012000010000761011611300361200001003004030040300403004330040
120204300422250534102512010110012000010012000050042895390300203003930042149733149971201002001200002003600003003930943111202011009910010012000010000761011611300361200001003004130040300403004030040
12020430039225014134892512010010012000110012000050042836220300233003930039149733149971201002001200002003600003003930039111202011009910010012000010000761011611300361200001003004030043300403004030944
1202043003922501410251201001001200011001200005009600000300203004230039149733159011201002001200002003600003004230039111202011009910010012000010000761011611300391200001003004030040300433004030043
1202043003922500410251201001001200001001200005009600000300203004230039149733150001201002001200002003600003004230039111202011009910010012000010000761011611300361200001003004030042300403004030043
1202043003922400440251201001001200001001200005009600000300203004230039149733150001201002001200002003600003004230922111202011009910010012000010000761011611300361200001003004030040300403004330040
1202043004222500610251201001001200531001200005009600001300233003930039149733149971201002001200002003600003094330039111202011009910010012000010000761011611300391200001003004030040309443004030944
1202043003922500410251201001001200531001200005009900000309243003930039149733149971201002001200002003600003004230039111202011009910010012000010000761011611300361200001003004030040300403070230043
1202043003922501410251201001001200011001200005009600000300233003930039149733149971201002001200002003600003174830039111202011009910010012000010000761011611300361200001003004030040300403004030040

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)0308181e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
120024300482251000046025120010101200001012000050960000113002030039300391499603150191200102012000020360000300393003911120021109101012000010100752231181621155300360155120000103004030040300403004030040
120024300392250000046025120010101200001012000050960000113002030922300391499603150191200102012000020360000300393003911120021109101012000010400752231171621188300360155120000103004030040300403004130040
1200243003922500000460251200101012000010120000509600000130020300393003914996031501912001020120000203600003003930039111200211091010120000100017524622516422553003603010120000103004030040300403004030802
1200243003922500001526695251200101012000010120000509600000130020300393003914996031501912001020120000203600003003930039111200211091010120000102007524622817422693003603010120000103004030040300403174430040
12002430039225000001150251200101012000010120000509600000130020300393003914996031501912001020120000203600003003930039111200211091010120000108607524622616422673003603110120000103004030040300403004130040
1200243003922500000520251200101012000010120112509600000130020301983092214996031501912001020120000203600003003930039111200211091010120000100007524622716322753003603010120000103004030040300403004830040
1200243003922500000520251200101012000010120000509600000130020300393003914996031501912001020120000203600003003930039111200211091010120000102007524622616422653003603010120000103004030040300403004030040
1200243003922500000520251200101012000010120000509600000130020300393003914996031502212001020120000203600003003930039111200211091010120000108007524622616422763003603010120000103004030040300403009130040
1200243003922400000520251200281012000010120000559600000130020300393003914996031501912001020120000203600003003930039111200211091010120000104007524622616422563003603110120000103004030040300403004030040
1200243003922500000526695251200101012000010120000509600000130020300393003914996031502212001020120000203600003003930039111200211091010120000100007524622816422663003603110120000103004030040300403004330040