Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SQRDMLSH (by element, 4H)

Test 1: uops

Code:

  sqrdmlsh v0.4h, v1.4h, v2.h[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
10043037230612548251000100010003983133018303730372415328951000100030003037303711100110000073216112630100030383038303830383038
10043037230612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372302512548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
10043037220612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
10043037230612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
10043037230612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
10043037229612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
10043037230612548251000100010003983133018303730372417629131000100030003037303711100110000073116112630100030383038303830383038
100430372312822548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
10043037230612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038

Test 2: Latency 1->1

Code:

  sqrdmlsh v0.4h, v1.4h, v2.h[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a7a8acc2c5branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102043003722500612954825101001001000010010000500427731303001830037300372827262874010100200100082003002430037300371110201100991001001000010000000111717001600296460100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731303001830037300372827272874110100200100082003002430037300371110201100991001001000010000000111717001600296470100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000710121622296340100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731303001830037300372826532874510100200100002003051630037300371110201100991001001000010000000000710121622296340100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000710121623296340100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000710121622296340100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000710121622296340100001003003830038300383003830038
102043003722400612954825101001001000010010000500427731303001830037300372826532874510100200100002023000030037300371110201100991001001000010000000000710121623297060100001003003830038300383003830038
102043003722500612954825101001001000010010000500427731313001830037300862827032874510100200100002083049230133301322110201100991001001000010021000000739121623296340100001003003830038300383003830038
102043003722400612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000710121622296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03080a0b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100243003722500000390726295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000000006402162229630010000103003830038300383003830038
1002430037225000000061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000000006402162229630010000103003830038300383003830038
1002430037225000003613261295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000000006402162229630010000103003830038300383003830038
1002430037225000000061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000000006402162229630010000103003830038300383003830038
1002430037225000000061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000000006402162229630010000103003830038300383003830038
1002430037225000000061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000000006402162229630010000103003830038300383003830038
1002430037225000000061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000000006402162329630010000103003830038300383003830038
1002430037224000000061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000000006402162229630010000103003830038300383003830038
1002430037225000000061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000000006402162229630010000103003830038300383003830038
1002430037225000000061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000000006402162229630010000103003830038300383003830038

Test 3: Latency 1->2

Code:

  sqrdmlsh v0.4h, v0.4h, v1.h[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)0318191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2c5cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102043003722500006129548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730085111020110099100100100001000000000071011611296340100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
102043003722500006129548251013410010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
1020430037225000012412953968101371181000811710149653427867030054301323013328271112878210421202103302063100230131301323110201100991001001000010020202278520757225112970623100001003013430134300853008630038
102043003722512162886129548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000071011610296340100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024300372250612954825100101010000101000050427731313001830037300372828703287671001020100002030000300373003711100211091010100001000640416442963010000103003830038300383003830038
10024300372240612954825100101010000101000050427731303001830037300372828703287671001020100002030000300373003711100211091010100001000640316442963010000103003830038300383003830038
10024300372250612954825100101010000141000050427731303001830037300372828703287671001020100002030000300373008411100211091010100001000640416342966810000103003830038300383003830038
10024300372250612954825100101010000101000050427731313001830037300372828703287671001020100002030000300373003711100211091010100001000640416342963010000103003830038300383003830038
10024300372250612954825100101010000101000050427731313001830037300372828703287671001020100002030000300373003711100211091010100001000640316442963010000103003830038300383003830038
100243003722401562954825100101010000101000050427731303001830037300372828703287671001020100002030000300373003711100211091010100001000640416442963010000103003830038300383003830038
100243003722504232954825100101010000101000050427731303001830037300372828703287671001020100002030000300373003711100211091010100001000640416432963010000103003830038300383003830038
100243003722507262954825100101010000101000050427731303001830037300372828703287671001020100002030000300373003711100211091010100001000640416442963010000103003830038300383003830038
10024300372250612954825100101010000101000050427731303001830037300372828703287671001020100002030000300373003711100211091010100001000640416442963010000103003830038300383003830038
100243003722507262954825100101010000101000050427731303001830037300372828703287671001020100002030000300373003711100211091010100001000640316432963010000103003830038300383003830038

Test 4: Latency 1->3

Code:

  sqrdmlsh v0.4h, v1.4h, v0.h[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03080b18191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9facbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020430037225000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
10204300372250000010329548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129768100001003003830038300383003830038
1020430037225010006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
1020430037225000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
1020430037224000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003008530038300383003830038
1020430037225000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161229670100001003003830038300383013230134
1020430084226012206129548251010010010000100101495874278670030054300373008428265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
1020430037225000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
1020430037225000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
1020430037225000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03091e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a7a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024300372250072629548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100000640216222963010000103003830038300383003830038
1002430037225106129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100000640216222963010000103003830038300383003830038
1002430037225006129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100010640216222963010000103003830038300383003830038
1002430037225006129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000640216222963010000103003830038300383003830038
1002430037225006129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000640216222963010000103003830038300383003830038
1002430037225006129539251001010100071010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000640216222963010000103003830038300383003830038
1002430084225006129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100000640216222963010000103003830038300383003830038
1002430037225006129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100000640216222963010000103003830038300383003830038
1002430037225008429548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000640216222963010000103003830038300383003830038
1002430037225006129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000640216222963010000103003830038300383003830038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  sqrdmlsh v0.4h, v8.4h, v9.h[1]
  movi v1.16b, 0
  sqrdmlsh v1.4h, v8.4h, v9.h[1]
  movi v2.16b, 0
  sqrdmlsh v2.4h, v8.4h, v9.h[1]
  movi v3.16b, 0
  sqrdmlsh v3.4h, v8.4h, v9.h[1]
  movi v4.16b, 0
  sqrdmlsh v4.4h, v8.4h, v9.h[1]
  movi v5.16b, 0
  sqrdmlsh v5.4h, v8.4h, v9.h[1]
  movi v6.16b, 0
  sqrdmlsh v6.4h, v8.4h, v9.h[1]
  movi v7.16b, 0
  sqrdmlsh v7.4h, v8.4h, v9.h[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)030818191e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f696b6d6e74scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204200901560000039258012410080000100800005006400000200450200642006403228010020080000200240000200642006411160201100991001001600001000000000101111161120061001600001002006520065200652006520065
1602042006415000033039258010010080000100800005006400000200450200642006403228010020080000200240000200642006411160201100991001001600001000008030101441161120061001600001002006520065200652006520065
1602042006415100000514258010010080000100800005006400000200450200642006403228010020080000200240000200642006411160201100991001001600001000000000101111161120061001600001002006520065200652006520065
160204200641500000039258010010080000100800005006400000200450200642006403228010020080000200240000200642006411160201100991001001600001000003000101111161120061001600001002006520065200652006520065
160204200641510110039258010010080000100800005006400000200450200642006403228010020080000200240000200642006411160201100991001001600001000005000101111161120061001600001002006520065200652006520065
160204200641500000039258010010080000100800005006400000200450200642006403228010020080000200240000200642006411160201100991001001600001000000000101111161120061001600001002006520065200652006520065
1602042006415000000392580100100800001008000050064000002004502006420064032280100200800002002400002006420064111602011009910010016000010000000301011111611200611601600001002006520065200652006520065
160204200641500000039258010010080000100800005006400000200453200642006403228010020080000200240000200642006411160201100991001001600001000007030101111161120061001600001002006520065200652006520065
1602042006415000000392580100100800001008000050064000002004502006420064032280100200800002002400002006420064111602011009910010016000010000052019730101111161120061001600001002006520065200652006520065
160204200641500000039258010010080000100800005006400000200450200642006403228010020080000200240000200642006411160201100991001001600001000004000101111161120061001600001002006520065200652006520065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)030b1e3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)ea? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600242007315000045258001212800001280000626400000152003120050200503228001220800002024000020050200501116002110910101600001013100428312123211242320043215160000102004720047200472004720047
1600242004615000045258001212800001280000626400001152002720046200463228001220800002024000020046200461116002110910101600001000100498312420211242320043215160000102004720047200472004720047
1600242004615008104525800121280000128000062640000115200272004620046322800122080000202400002004620046111600211091010160000100129100468312420211242220043215160000102004720047200472004720047
1600242004615000045258001212800001280000626400001152002720046200463228001220800002024000020046200461116002110910101600001000100468312320211252520043215160000102004720047200472004720047
1600242004615000045258001212800001280000626400001152002720046200463228001220800002024000020046200461116002110910101600001000100468312520211222320043215160000102004720047200472004720047
1600242004615000045258001212800001280000626400001152002720046200463228001220800002024000020046200461116002110910101600001000100468312320211222320043215160000102004720047200472004720047
16002420046150000520258001212800001280000626400001152002720046200463228001220800002024000020046200461116002110910101600001000100478312420211241820043215160000102004720047200472004720047
1600242004615000045258001212800001280000626400001152002720046200463228001220800002024000020046200461116002110910101600001000100468312420211232420043215160000102004720047200472004720047
1600242004615000045258001212800001280000626400001152002720046200463228001220800002024000020046200461116002110910101600001000100478312420211232420043215160000102004720047200472004720047
1600242004615000045258001212800001280000626400001152002720046200463228001220800002024000020046200461116002110910101600001000100508312420211222220043215160000102004720047200472004720047

Test 6: throughput

Count: 12

Code:

  sqrdmlsh v0.4h, v12.4h, v13.h[1]
  sqrdmlsh v1.4h, v12.4h, v13.h[1]
  sqrdmlsh v2.4h, v12.4h, v13.h[1]
  sqrdmlsh v3.4h, v12.4h, v13.h[1]
  sqrdmlsh v4.4h, v12.4h, v13.h[1]
  sqrdmlsh v5.4h, v12.4h, v13.h[1]
  sqrdmlsh v6.4h, v12.4h, v13.h[1]
  sqrdmlsh v7.4h, v12.4h, v13.h[1]
  sqrdmlsh v8.4h, v12.4h, v13.h[1]
  sqrdmlsh v9.4h, v12.4h, v13.h[1]
  sqrdmlsh v10.4h, v12.4h, v13.h[1]
  sqrdmlsh v11.4h, v12.4h, v13.h[1]
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)03080b18191e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1202043014222500000004102512010110012000010012000050096000013002030039300391497331499712010020012000020036000030039300391112020110099100100120000100000100907610216113003601200001003004030040300403004030043
12020430922225000033004102512010110012000010012000050096000013002030039300391497331499712010020012000020036000030039300391112020110099100100120000100000001807610116113003601200001003004030040300403004030040
12020430039225000000041025120168100120000100120000500960000130020300393003914973314997120100200120000200360000300393003911120201100991001001200001000000014107610116113003601200001003004030040300403004030040
1202043003922500000004102512010010012000010012000050096000013002030039300391497331499712010020012000020036000030039300391112020110099100100120000100000380007610116113003601200001003004030040300403004030040
1202043003922500000004102512010010012000010012000050096000013002030039300391497331499712010020012000020036000030039300391112020110099100100120000100000340007610116113003601200001003004030040300403004030040
1202043003922500000004102512010010012000110012000050096000013002030039300391497331499712010020012000020036000030039300391112020110099100100120000100000370205607610116113003601200001003004030040300403004030923
120204300392240000000410251201011001200001001200005009600001300203003930039149733149971201002001200002003600003003930039111202011009910010012000010000000607610116113003601200001003004030040300403004031775
1202043003922500000004102512010110012000010012000050096000013002030039300391497331499712010020012000020036000030039300391112020110099100100120000100000340007610116113003601200001003004030040300403004031751
1202043003922500000004102512010010012000010012000050096000013002030039300391497331499712010020012000020036000030039300391112020110099100100120000100000260607610116113003601200001003004030040300403004030043
120204300392240000000410251201011001200011001200005009600001317553003930039149733149971201002001200002003600003003930039111202011009910010012000010000050307610116113003601200001003004030040300403004030040

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)03081e373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8accdcfd0icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1200243036722502104002512001210120000101200005096000001300203003930039149963150191200102012000020360000300393003911120021109101012000010001207520002216000191930038120000103004030040300903004030043
1200243003922500040025120029101200001012000050960000013002030039300391499631501912001020120000203600003003930039111200211091010120000100299075200081600091930036120000103004030040317513004030040
12002430039225000400251200121012003410120000509600000030020300393003914996315019120010201200002036000030039300391112002110910101200001000607520001916000191930036120000103004030040300403004030040
1200243003922501650400251200121012000010120000509600000030020300393003914996315019120010201200002036000030039300391112002110910101200001000420752000191600091930036120000103004030040300403004030040
12002430039225000400251200121012000010120000509600000130020300393003914996315019120010201200002036000030039300391112002110910101200001000330752000191600091930036120000103004030040300403004030106
12002430039225000400251200121012000010120000509600000130020300393003914996315019120010201200002036000030039300391112002110910101200001007607520001916000191930036120000103004030040300423004030040
12002430039225000610251200121012000010120000509600000030020300393003914996315019120010201200002036000030039300391112002110910101200001009607520001616000191930036120000103004030040300403004030040
120024300392250004002512001210120000101200005096000001300203003930039149963150191200102012000020360000300393004111120021109101012000010001050752031162820019730036120000103004030040300403004030040
120024300392250004002512002810120000101200005096000001300203003930039166773150191200102012000020360000300393003911120021109101012000010001140752000191600091930036120000103004030040300403004030040
12002430039225012046025120012101200001012000050960000013002030039300391499631501912001020120000203600003003930039111200211091010120000100090752000191600091930036120000103004030040300403004031751