Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SQDMLAL (by element, 2D)

Test 1: uops

Code:

  sqdmlal v0.2d, v1.2s, v2.s[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
100430372306125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372208225482510001000100039831303018303730372415328951000100030003037303711100110001073116112630100030383038303830383038
100430372306125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372206125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831303018303730372415328951000100030003037303711100110000373116112630100030383038303830383038
100430372306125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038

Test 2: Latency 1->1

Code:

  sqdmlal v0.2d, v1.2s, v2.s[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204300372250061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000710121622296340100001003003830038300383003830038
10204300372240061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000710121622296340100001003003830038300383003830038
10204300372240061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000710121622296340100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000710121622296340100001003003830038300383003830038
102043003722500798295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000712121622296340100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000710121632296340100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000712121622296341100001003003830133300383003830038
10204300372250061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000710121622296340100001003003830038300383003830038
10204300372250061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000710131622296340100001003003830038300383003830038
10204300372250061295482510100100100081001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000710121622296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)0308090b18191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a8accdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037225000000156295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000640416332963010000103003830038300383003830038
100243003722500000061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003721100211091010100001000030640224432963010000103003830038300383008630038
1002430037225001000346295482510010101000010100005042773131300183003730084282873287671001020103292030000300373003711100211091010100001000000640216332966810000103003830038300383003830038
1002430084225000006726295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000000640224232963010000103003830038300383003830038
100243003722500001061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000640316222963010000103003830038300383003830038
1002430037224000000115295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
100243003722500000061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000640316322963010000103003830038300383003830038
100243003722500000061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000640216332963010000103003830038300383003830038
100243003722400000061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000640316232963010000103003830038300383003830038
100243003722500000061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000640216332963010000103003830038300383003830038

Test 3: Latency 1->2

Code:

  sqdmlal v0.2d, v0.2s, v1.s[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)0318191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020430037225020612954825101001001000010010000500427731313001830037300372826532874510100200101612003000030037300371110201100991001001000010007101161129634100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010007101161129634100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010007101161129634100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010007101161129634100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010007101161129634100001003003830038300383003830038
10204300372250151612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010007101161129634100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010007101161129634100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010007101161129634100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010007101161129634100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010007101161129634100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03181e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6accfd0d5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037225006129548251001010100001010000504277313010300183003730037282873287671001020100002030000300373003711100211091010100001000064010316332963010000103003830038300383003830038
1002430037225045612954825100101010000101000050427867010300183003730037282873287671001020100002030000300373003711100211091010100001000064010316332963010000103003830038300383003830038
10024300372250061295482510010101000010100005042773131030018300373003728287328767100102010000203000030037300371110021109101010000100006400316332963010000103003830038300383003830038
10024300372250061295482510010101000010100005042773131030018300373003728287328767100102010000203000030037300371110021109101010000100006400316332963010000103003830038300383003830038
1002430037225423161295482510010101000010100005042773130830018300373003728287328767100102010163203000030037300371110021109101010000100006408316332963010000103003830038300383003830038
10024300372250061295482510010101000010100005042773130830018300373003728287328767100102010180203000030037300371110021109101010000100006400316322963010000103003830038300383003830038
10024300372250061295482510010101000010100005042773130830018300373003728287328767100102010000203000030037300371110021109101010000100006400316332963010000103003830038300383003830038
10024300372250061295482510010101000010100005042773131830018300373003728287328767100102010000203000030037300371110021109101010000100006408316332963010000103003830038300383003830038
10024300372240061295482510010101000010100005042773131830018300373003728287328767100102010000203000030037300371110021109101010000100006408316332963010000103003830038300383003830038
100243003722504861295482510010101000010100005042773131830018300373003728287828767100102010000203000030037300371110021109101010000100006408316332963010000103003830038300383003830038

Test 4: Latency 1->3

Code:

  sqdmlal v0.2d, v1.2s, v0.s[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)0318191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
102043003722500061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011611296340100001003003830038300383003830038
1020430037225000251295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011611296340100001003003830038300383003830038
1020430037225003361295482510100100100001001000050042773131300183003730037282653287451010020410000200300003003730037111020110099100100100001000071011611296340100001003003830038300383003830038
102043003722500061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011611296340100001003003830038300383003830038
102043003722500061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011611296340100001003003830038300383003830038
102043003722500061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011601296340100001003003830038300383003830038
102043003722500961295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011611296340100001003003830038300383003830038
102043003722400061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011611296340100001003003830038300383003830038
102043003722400061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011611296340100001003003830038300383003830038
102043003722500061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011611296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)0318191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100243003722500006129548251001010100001010000504277313130018300373003728287032876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
100243003722540342072629548251001010100001010000504277313030018300373003728287032876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037225000025129548251001010100001010000504277313030018300373003728287032876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
100243003722500006129548251001010100001010000504277313130018300373003728287032876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
100243003722500006129548251001010100001010000504277313030018300373003728287032876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037225000061295482510010101000010100005042773130300183003730037282870122876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
100243003722500006129548251001010100001010000504277313030018300373003728287032876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037225001506129548251001010100001010000504277313130018300373003728287032876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
100243003722500006129548251001010100001010000504277313130018300373003728287032876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
100243003722500006129548251001010100001010000504277313130018300373003728287032876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  sqdmlal v0.2d, v8.2s, v9.s[1]
  movi v1.16b, 0
  sqdmlal v1.2d, v8.2s, v9.s[1]
  movi v2.16b, 0
  sqdmlal v2.2d, v8.2s, v9.s[1]
  movi v3.16b, 0
  sqdmlal v3.2d, v8.2s, v9.s[1]
  movi v4.16b, 0
  sqdmlal v4.2d, v8.2s, v9.s[1]
  movi v5.16b, 0
  sqdmlal v5.2d, v8.2s, v9.s[1]
  movi v6.16b, 0
  sqdmlal v6.2d, v8.2s, v9.s[1]
  movi v7.16b, 0
  sqdmlal v7.2d, v8.2s, v9.s[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)03070818191e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acc5cdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020420089150000021392580100100800001008000050064000000200452006420064322801002008000020024000020064200641116020110099100100160000100000001011111611200611600001002006520065200652006520065
1602042006415000000392580100100800001008000050064000000200452006420064322801002008000020024000020064200641116020110099100100160000100000001011111611200611600001002006520065200652006520065
1602042006415000000392580100100800001008000050064000000200452006420064322801002008000020024000020064200641116020110099100100160000100000001011111611200611600001002006520065200652006520065
1602042006415000000392580100100800001008000050064000001200452006420064322801002008000020024000020064200641116020110099100100160000100000001011111611200611600001002006520065200652006520065
1602042006415000000392580100100800001008000050064000001200452006420064322801002008000020024000020064200641116020110099100100160000100000001011111611200611600001002006520065200652006520065
1602042006415100000392580100100800001008000050064000000200452006420064322801002008000020024000020064200641116020110099100100160000100000001011111611200611600001002006520065200652006520065
1602042006415100000392580100100800001008000050064000001200452006420064322801002008000020024000020064200641116020110099100100160000100000001011111611200611600001002006520065200652006520065
16020420064150000018392580100100800001008000050064000001200452006420064322801002008000020024000020064200641116020110099100100160000100000001011111611200611600001002006520065200652006520065
1602042006415010003392580100100800001008000050064000001200452006420064322801002008000020024000020064200641116020110099100100160000100000001011111611200611600001002006520065200652006520065
160204200641510000213392580100100800001008000050064000000200452006420064322801002008000020024000020064200641116020110099100100160000100300001011111611200611600001002006520065200652006520145

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)0304191e1f3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8a9accfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)ea? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600242006715010177005125800121280000128000062640000115200332005020050322800122080000202400002004620050111600211091010160000100000100468312222422171820049230160000102005320047200472004720051
16002420050150000005125800121280000128000062640000115200332005020050322800122080000202400002005020052111600211091010160000100000100448311824222191420047230160000102004720051200532005120049
1600242004815000000514258001212800001280000626400001152003120050200463228001220800002024000020050200501116002110910101600001000001004611311826412192120047230160000102005120051200512004920051
160024200481500000045258001212800001280000626400000152002720050200463228001220800002024000020050200461116002110910101600001000001004011311820412171720047231160000102005320047200512004720051
160024200501500000051258001212800001280000626400000152003320052200463228001220800002024000020046200461116002110910101600001000001004111311826422171720047230160000102005120047200512005120051
160024200501500000051258001212800001280000626400000152002720046200503228001220800002024000020050200501116002110910101600001000001003911321722221171620047231160000102005120047200512005120053
160024200501500090045258001212800001280000626400000152003320050200523228001220800002024000020050200461116002110910101600001000001004311211724221191820049215160000102005120051200512004720051
160024200501500000051258001212800001280000626400001152002720052200523228001220800002024000020050200501116002110910101600001000001004811612226221192020045215160000102005120051200532004920049
160024200501500000051258001212800001280000626400000152003320050200463228001220800002024000020052200501116002110910101600001000001004511621724221181720043230160000102005120051200512005320047
160024200501500000045258001212800001280000626400000152003120046200463228001220800002024000020046200501116002110910101600001000001004511321924321191820047230160000102005120047200472005320053

Test 6: throughput

Count: 12

Code:

  sqdmlal v0.2d, v12.2s, v13.s[1]
  sqdmlal v1.2d, v12.2s, v13.s[1]
  sqdmlal v2.2d, v12.2s, v13.s[1]
  sqdmlal v3.2d, v12.2s, v13.s[1]
  sqdmlal v4.2d, v12.2s, v13.s[1]
  sqdmlal v5.2d, v12.2s, v13.s[1]
  sqdmlal v6.2d, v12.2s, v13.s[1]
  sqdmlal v7.2d, v12.2s, v13.s[1]
  sqdmlal v8.2d, v12.2s, v13.s[1]
  sqdmlal v9.2d, v12.2s, v13.s[1]
  sqdmlal v10.2d, v12.2s, v13.s[1]
  sqdmlal v11.2d, v12.2s, v13.s[1]
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)030b181e1f373a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8a9acc2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
12020430535225000018061025120100100120000100120000500990000131729317483003914973314997120100200120000200360000317483003911120201100991001001200001000000007610116113003601200001003174930040317493004031749
1202043174822500000041671325120118100120018100120000500960000130021317483003914973814997120100200120000200360000300393004011120201100991001001200001000000007610116113174501200001003004031749300403174930040
120204300392380015000410251201001001200001001200005004399524130020317483003916653315000120100200120000200360000300393003911120201100991001001200001000000007610116113003601200001003004030041300403174930047
120204300392380008854041671325120118100120018100120000500960000130021300403003914973314997120100200120000200360000317483003911120201100991001001200001000000007610116123174501200001003004031749300403174930536
12020431748225000018042671325120118100120215120121256611182396813034331003306741531938156831212622001214442023628623107130727911202011009910010012000010021242484076382992230966231200001003143831328315123019831666
120204311032271893661624115923080180120961120120640122121151610213948413022630948309711583237161521210722001209472043625593045530676911202011009910010012000010000022280077562105123132801200001003004130040300403004030041
12020430039225000000178671325120118100120018100120000500960000130020317483003914973314997120100200120000200360000300393174811120201100991001001200001000000007610116113003601200001003174930040317493004031749
12020431748225000018061025120100100120000100120000500990000130020300393174816653315000120100200120000200360000300393174811120201100991001001200001000000007610116113174501200001003004031749300403004130040
1202043003923800000041671325120100100120018100120000500960000130020300393174816653314997120100200120000200360000300393174811120201100991001001200001000000007610116113003601200001003174930040300413004030040
1202043003922500000041025120100100120000100120000500960000131729317483003914973314997120100200120000200360000300393174811120201100991001001200001000000007610116113174501200001003004031749300403174930040

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)03080a1e373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5cfd5d6ddinst fetch restart (de)e0ea? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
120024312752250148184002512001110120000101200005096000041300203003930039150693150191200102012000020360000300393003911120021109101012000010000752091669300360120000103004130041300413004130952
1200243092222500006102512001010120000101200005099000061309033003930039149963150191200102012000020360000300393092211120021109101012000010000752091696300360120000103004030040317493004030040
1200243003922500004002512001010120000101200005096000061300203003930039149963159021200102012000020360000300393003911120021109101012000010000752091695300370120000103004130041300413175130923
1200243003922500004002512001010120000101200005096000071300203003930039149963150201200102012000020360000300403003911120021109101012000010000752091659300360120000103004030040300403004030040
1200243003922500004002512001010120000101200005096000081300203003930039149963159021200102012000020360000300393003911120021109101012000010000752051699300360120000103004030040300403004030040
1200243003922500004002512001010120000101200005096000091300203004030039149963150191200102012000020360000300393003911120021109101012000010000752061695300370120000103004030040300403004030040
12002430039225000040025120010101200001012000050960000101300203003930039149963150191200102012000020360000300393003911120021109101012000010000752091610103003620120000103004030040300403004030040
12002430039225006061025120010101200001012000050960000813002030039300391499631501912001020120000203600003003930039111200211091010120000100007520101695300360120000103004030040300403004030040
1200243003922500004067132512001010120000101200005096000081300203003930039149963150191200102012000020360000300393003911120021109101012000010000752071699300360120000103004030040300403004030040
1200243003922500004002512001010120000101200005096000091317293003930039149963150191200102012000020360000300393003911120021109101012000010000752051695300360120000103004030040300403004030040