Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SQRDMLSH (by element, 2S)

Test 1: uops

Code:

  sqrdmlsh v0.2s, v1.2s, v2.s[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
100430372306125482510001000100039831303018303730372415328951000100030003037303711100110001073116112630100030383038303830383038
100430372306125482510001000100039831313018303730372415328951000100030003037303711100110001073116112630100030383038303830383038
100430372306125482510001000100039831303018303730372415328951149100030003037303711100110001373116112630100030383038303830383038
100430372336125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831303018303730372415328951000100030003037303711100110005073116112630100030383038303830383038
1004303723025125482510001000100039831313018303730372415328951000100030003037303711100110001073116112630100030383038303830383038
100430372206125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038

Test 2: Latency 1->1

Code:

  sqrdmlsh v0.2s, v1.2s, v2.s[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03080b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a7a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102043003722500307506129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000200071031622296340100001003003830038300383003830038
10204300372240000006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000100071021622296340100001003003830038300383003830038
10204300372250000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000600071021622296340100001003003830038300383003830038
102043003722500000061295482510100100100001001000053342773130300183003730037282653287451010020010000200300003003730037111020210099100100100001000002120071021622296340100001003003830038300383003830038
10204300372250000006129548251010010010000100100005114277313030018300373003728265328764101002001000020030000300373003711102011009910010010000100000503071021622296340100001003003830038300383003830038
102043003722500000061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000003200071021622296340100001003003830038300383003830038
102043003722500000018929548251010010010000100100005004277313030018300373003728265328745101002121000020030000300373003711102011009910010010000100000600077921622296340100001003003830038300383003830038
102043003722500001206129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373022711102011009910010010000100000700071021622296340100001003003830038300383003830038
102043003722500000061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000004084271021622296340100001003003830038300383003830038
10204300372250000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000100071021622296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)0318193f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9acc2c5cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100243003722500612954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010330000640216222963010000103003830038300383003830038
100243003722500612954825100101010000101014950427731313001830037300372828732876710010201000020300003003730037111002110910101000010008400640216222963010000103003830038300383003830038
100243003722500612954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010530600640216222963010000103003830038300383003830038
1002430037225006129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100017100640216222963010000103003830038300383003830038
10024300372250061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
10024300372250061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
10024300372330061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
10024300372250061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
100243003722500612954825100101010032101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010006900640216222963010000103003830038300383003830038
10024300372241161295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038

Test 3: Latency 1->2

Code:

  sqrdmlsh v0.2s, v0.2s, v1.s[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102043003722500233295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161029634100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038
102043003722500145295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038
10204300372250084295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000007101161129634100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)0308091e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a8accdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037225000010329548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
1002430037225000053829548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
1002430037225000037929548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
1002430037224000046629548251001010100001010000504277313301263003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
1002430037225000036729548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
1002430037225006019129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000028410640216222963010000103003830038300383003830038
1002430037225000010329548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
1002430037225000048029548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000001640216222963010000103003830038300383003830038
1002430037224000016629548251001010100001010000504277313300183003730037282873287671001022100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
1002430037225000061729539251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038

Test 4: Latency 1->3

Code:

  sqrdmlsh v0.2s, v1.2s, v0.s[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030818191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204300372240000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000000071021622296340100001003003830038300383003830038
102043003722500000010329548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000200071021622296340100001003003830038300383003830038
10204300372250000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000000071021622296340100001003003830038300383003830038
10204300372250000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000000071021622296340100001003003830038300383003830038
10204300372250000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000000071021622296340100001003003830038300383003830038
10204300372240000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003721102011009910010010000100000000071021622296340100001003003830038300383003830038
10204300372250000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000000071021622296340100001003003830038300383003830038
10204300372250000006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000000071021622296340100001003003830038300383003830038
10204300372250000006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000071021622296340100001003003830038300383003830038
1020430037225000000296229548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000071021622296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037225061295482510010101000010100005042773131300183003730037282870328767100102010000203000030037300371110021109101010000100006844162229630210000103003830038300383003830038
10024300372250726295482510010101000010100005042773131300183003730037282870328767100102010000203000030037300371110021109101010000100006403164329630010000103003830038300383003830038
10024300372250536295482510010131001610100005042773131300183003730037282870328767100102010000203000030037300371110021109101010000100006403164429630010000103003830038300383003830038
1002430037225061295482510010101000010100005042773131300183003730037282870328767100102010000203000030037300371110021109101010000100006403164329630010000103003830038300383003830038
1002430037225061295482510010101000010100005042773131300183003730037282870328767100102010000203000030037300371110021109101010000100006404164329630010000103003830038300383003830038
10024300372250251295482510010101000010100005042773130300183003730037282870328767100102010000203000030037300371110021109101010000100006404164329630010000103003830038300383003830038
10024300372240536295482510010101000010100005042773130300183003730037282870328767100102010000203000030037300371110021109101010000100006402163329630010000103003830038300383003830038
10024300372250441295482510010101000010100005042773131300183003730037282870328767100102010000203000030037300371110021109101010000100006403164329630010000103003830038300383003830038
1002430037225061295482510010101000010100005042773130300183003730037282870328767100102010000203000030037300371110021109101010000100006404164329630010000103003830038300383003830038
10024300372240612954825100101010000101000050427731313001830037300372828703287671001020100002030000300373003711100211091010100001012006403163429630010000103003830038300383003830038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  sqrdmlsh v0.2s, v8.2s, v9.s[1]
  movi v1.16b, 0
  sqrdmlsh v1.2s, v8.2s, v9.s[1]
  movi v2.16b, 0
  sqrdmlsh v2.2s, v8.2s, v9.s[1]
  movi v3.16b, 0
  sqrdmlsh v3.2s, v8.2s, v9.s[1]
  movi v4.16b, 0
  sqrdmlsh v4.2s, v8.2s, v9.s[1]
  movi v5.16b, 0
  sqrdmlsh v5.2s, v8.2s, v9.s[1]
  movi v6.16b, 0
  sqrdmlsh v6.2s, v8.2s, v9.s[1]
  movi v7.16b, 0
  sqrdmlsh v7.2s, v8.2s, v9.s[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)03181e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8a9acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020420089150002925801161008001610080028500640196020045200652006561280128200800282002400842006520065111602011009910010016000010000001111011961600200621600001002006620066200662006620066
160204200651500362925801161008001610080028500640196120045200652006561280128200800282002400842006520065111602011009910010016000010000001111011901600200621600001002006620066200662006620066
160204200651510152925801991008001610080028500640196020045200652006561280128200800282002400842006520065111602011009910010016000010000001111011901600200621600001002006620066200662006620066
160204200651501332925801161008001610080028500640196020045200652006561280128200800282002400842006520065111602011009910010016000010000001111011901600200621600001002006620066200662006620066
160204200651500122925801161008001610080028500640196020045200652006561280128200800282002400842006520065111602011009910010016000010000001111011901600200621600001002006620066200662006620066
16020420065150002925801161008001610080028500640196020045200652006561280128200800282002400842006520065111602011009910010016000010020001111011901600200621600001002006620066200662006620066
16020420065151002925801161008001610080028500640196020045200652006561280128200800282002400842006520065111602011009910010016000010000001111011901600200621600001002006620066200662006620066
16020420065150002925801161008001610080028500640196020045200652006561280128200800282002400842006520065111602011009910010016000010000001111011901600200621600001002006620066200662006620066
16020420065150002925801161008001610080028500640196020045200652006561280128200800282002400842006520065111602011009910010016000010000001111011901600200621600001002006620066200662006620066
160204200651500362925801161008001610080028500640196020045200652006561280128200800282002400842006520065111602011009910010016000010000001111011901600200621600001002006520065200652006520065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)03191e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a7a8a9acc2cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024200771500045278001212800001280000626400001102003220051200513228001220800002024000020051200511116002110910101600001000000010027821925211642004822001160000102005220052200522005220052
1600242005115000452780012128000012800006264000011520032200512005132280012208000020240000200512005111160021109101016000010000000100261121625211552004822001160000102005220052200522005220052
160024200511510045278001212800001280000626400001152003220051200513228001220800002024000020051200511116002110910101600001000000010028821425211542004822001160000102005220052200522005220052
160024200511500045278001212800001280000626400001152003220051200513228001220800002024000020051200511116002110910101600001000000010030821725211542005722001160000102005220052200522005220052
160024200511500045278001212800001280000626400001152003220051200513228001220800002024000020051200601116002110910101600001000000010030831325211662004822001160000102005220052200522005220052
160024200511500045278001212800001280000626400001152003220051200513228001220800002024000020051200511116002110910101600001000000010029821425211442004822001160000102005220052200522005220052
1600242005115000520278001212800001280000626400001152003220051200513228001220800002024000020051200511116002110910101600001000000010030821425211552004822001160000102005220052200522005220052
1600242005115000452780012128000012800006264000011520032200512005132280012208000020240000200512005111160021109101016000010000000100321121425211642004822001160000102005220052200522005220052
160024200511500045278001212800001280000626400001152003220051200513228001220800002024000020051200511116002110910101600001000000010028821425211562004822001160000102006120052200522005220052
160024200511500045278001212800001280000626400001152003220051200513228001220800002024000020051200511116002110910101600001000000010029821425211562004822001160000102005220052200522005220052

Test 6: throughput

Count: 12

Code:

  sqrdmlsh v0.2s, v12.2s, v13.s[1]
  sqrdmlsh v1.2s, v12.2s, v13.s[1]
  sqrdmlsh v2.2s, v12.2s, v13.s[1]
  sqrdmlsh v3.2s, v12.2s, v13.s[1]
  sqrdmlsh v4.2s, v12.2s, v13.s[1]
  sqrdmlsh v5.2s, v12.2s, v13.s[1]
  sqrdmlsh v6.2s, v12.2s, v13.s[1]
  sqrdmlsh v7.2s, v12.2s, v13.s[1]
  sqrdmlsh v8.2s, v12.2s, v13.s[1]
  sqrdmlsh v9.2s, v12.2s, v13.s[1]
  sqrdmlsh v10.2s, v12.2s, v13.s[1]
  sqrdmlsh v11.2s, v12.2s, v13.s[1]
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)030b18191e1f373a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8a9acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
120204300572250000000610251201171001200001001200005009600000300200300393004214973314997120100200120000200360000300393004211120201100991001001200001000000007610116113003601200001003004030043300403004030040
120204300392250002970008302512010110012000012012000050010126400300200300393004214973315901120100200120000200360000300423003911120201100991001001200001000020007610116113003601200001003004030040300403092330040
120204300392250000000410251201001001200001001200005009600000300203309433003915827314997120100200120000200360000300393003911120201100991001001200001000000007610116113003601200001003004330040300403094430040
120204309432250006000610251201001001200011001200005009600001300230300393004114973314997120100200120000200360000300393004211120201100991001001200001000000007610116113016001200001003004030040309443004030944
12020430039225000348000610251201001001200171001200005009600001309030300393004014973314997120100200120000200360000300413003911120201100991001001200001000000007610116113003601200001003004330040300403004330040
12020430042225000375000410251201001001200001001200005009600000300200300393094314973315000120100200120000200360000300423003911120201100991001001200001000000007610116113003601200001003004330040300403092330040
12020430042225000324000610251201001001200011001200005009600001300230300393003914973315000120100200120000200360000300423003911120201100991001001200001000000007610116113003601200001003004030040309443004030923
120204300392250000000440251201001001200011001200005009600000300230300393004214973315000120100200120000200360000300393004211120201100991001001200001000000007610116113003601200001003004030043300403004030040
12020430039225000369000440251201001001200531001200005009600001309030300393003914973314997120100200120000200360000300393094311120201100991001001200001000000007610116113094001200001003004030040300403094430040
12020430039225000357000410251201001001200011001200005009600001300230300393003914973314997120100200120000200360000309433003911120201100991001001200001000000007610116113003601200001003004030043300403004030040

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)03080b18191e1f373f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a7a9acc2cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1200243004822500007800236251200101012000010120000509600001130020300393003914996315019120010201200002036000030039300391112002110910101200001000000752231191621168300360155120000103004030040300403004030040
120024300392250000138003372512001010120000101200005096000001300203003930039149963150191200102012000020360000300393003911120021109101012000010000007524652516422553003603110120000103004030040300403004030042
120024300392250000000522512001010120000101200005096000001300203003930039149963150191200102012000020360000300393003911120021109101012000010000007524311716411553091903010120000103004030040300403004030040
12002430039225000012900462512001010120000101200005096000011300203003930039158503150191200102012000020360000300393003911120021109101012000010000007524622616322773003601510120000103004030040300403004030040
12002430039225000010500522512001010120000101200005096000001300203003930039149963150191200102012000020360000300393003911120021109101012000010000007524622516322773003603010120000103004030040300403004030040
1200243003922500001080052251200101012000010120000509600000130020300393003914996315019120010201200002036000030039300391112002110910101200001000000752232181621156300360155120000103004030040300403004030040
12002430089224000011400522512001010120000101200005096000001317293003930039149963150191200102012000020360000300393003911120021109101012000010000007524622516422753003603010120000103004030040300403004030040
12002430039225000033600522512001010120000101200005096000001300203003930039149963150191200102012000020360000300393003911120021109101012000010000007524652516322893003603010120000103004030040300403004030040
12002430039225000010800522512001010120000101200005096000001300203003930039149963150191200102012000020360000300393003911120021109101012000010000007524652716422983003603010120000103004030040300403004030040
1200243003922500009000522512001010120000101200005096000001300203003930039149963150191200102012000020360000300393003911120021109101012000010000007524622816422653003603010120000103004030040300403004030040