Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

UDOT (by element, 16B)

Test 1: uops

Code:

  udot v0.4s, v1.16b, v2.4b[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e1f3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
100430372400612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372400612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372300612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
10043037240252612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372300612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372300612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372300612548251000100010003983133018303730372415328951000100030003037303711100110006373116112630100030383038303830383038
100430372300612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372300612548251000100010003983133018303730372415328951000100030003037303711100110001073116112630100030383038303830383038
100430372300612548251000100010003983133018303730372415328951000100030003037303711100110000073116112630100030383038303830383038

Test 2: Latency 1->1

Code:

  udot v0.4s, v1.16b, v2.4b[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8a9acc5cfd0d5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204300372330061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000071002162229634100001003003830038300383003830038
10204300372330061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000071002162229634100001003003830038300383003830038
10204300372330084295482510100100100161001000050042773130300183003730084282653287451010020010000200300003003730037111020110099100100100001000140071002162229634100001003003830038300383003830038
102043003723300103295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000071002162229634100001003003830038300853003830038
102043013123312082295482510100100100001001000050042773130300183008430085282653287451010020010000200300003003730037111020110099100100100001000000071002162229634100001003003830038300383003830038
10204300372330061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000071002162329634100001003003830038300383003830038
10204300372320061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000100071002162329634100001003003830038300383003830038
10204300372330061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000071002163229634100001003003830038300383003830038
10204300372410061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000206071012162229634100001003003830038300383003830038
10204300372330061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000071012163229634100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03080b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8a9accfd5d6ddinst fetch restart (de)e0? int output thing (e9)ebec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037233000000111295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000006640316332963000010000103003830038300383003830038
100243003723200000061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000640316332963000010000103003830038300383003830038
100243003723200000061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000640316332963000010000103003830038300383003830038
100243003723300000082295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000000640316332963001010000103003830038300383003830038
100243003723300000084295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000100640316332963000010000103003830038300383003830038
1002430037232000036061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000003640316432963000010000103003830038300383003830038
100243003723300000061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000000640316332963000010000103003830038300383003830038
100243003723300000084295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000640316332963000010000103003830038300383003830038
10024300372320000002007295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000000640316342963000010000103003830038300383003830038
100243003723300000061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000640316332963000010000103003830038300383003830038

Test 3: Latency 1->2

Code:

  udot v0.4s, v0.16b, v1.4b[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204300372330061295482510100100100001001000050042773130300183003730037282726287401010020010008200300243003730037111020110099100100100001000011171801600296460100001003003830038300383003830038
1020430037232012103295482510100100100001001000050042773130300183003730037282727287401010020010008200300243003730037111020110099100100100001000011171801600296460100001003003830038300383003830038
10204300372330061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000071011611296340100001003003830038300383003830038
10204300372330061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000071011611296340100001003003830038300383003830038
102043003723300726295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000071011611296340100001003003830038300383003830038
10204300372330082295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000071011611296340100001003003830038300383003830038
10204300372330061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000071011611296340100001003003830038300383003830038
102043003723300103295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000300071011611296340100001003003830038300383003830038
10204300372320061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000071031611296340100001003003830038300383003830038
10204300372332162103295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000071011611296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)0308090b191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037233000000842954825100101010000101000050427731313001830037300372828732876710609201016820300003003730037111002110910101000010000640216222963010000103003830038300383003830038
10024300372330000120612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
10024300372320000120612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
100243003723300001590612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037232000000612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
100243003723200001201262954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037232000000612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
10024300372320000002022954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
1002430037233000000612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010030640216222966810000103022730038300383003830038
1002430037233001000612954825100101010000101000050427731303001830037300372828732876710010201000020300003003730037111002110910101000010000640316222963010000103003830038300383003830038

Test 4: Latency 1->3

Code:

  udot v0.4s, v1.16b, v0.4b[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204300372330612954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
10204300372330612954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
10204300372330612954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
10204300372410612954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
10204300372410612954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
10204300372330882954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
10204300372330612954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
10204300372320612954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
10204300372330612954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100007101161129634100001003003830038300383003830038
10204300372330612954825101001001000010010000500427731330018300373003728265328745101002001000020030000300373003711102011009910010010000100137101161129634100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03080b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2cficache miss (d3)d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037233000012006129530251001010100001010000504277313130018030037300372828732876710010201000020300003003730037111002110910101000010000000064002162229630010000103003830038300383003830038
1002430037233000000010329548251001010100001010000504277313030018030037300372828732876710010201000020300003003730037111002110910101000010000000064002162229630010000103003830038300383003830038
100243003723200000006129548251001010100001010000504277313130018030037300372828732876710010201000020300003003730037111002110910101000010000000064002162229630010000103003830038300383003830038
100243003723300000006129548251001010100001010000504277313130018030037300372828732876710010201000020300003003730037111002110910101000010000000064002162229630010000103003830038300383003830038
1002430037233000000063129548251001010100001010000504277313130018030037300372828732876710010201000020300003003730037111002110910101000010000000064002162229630010000103003830038300383003830038
100243003723200000006129548251001010100001010000504277313130018030037300372828732876710010201000020300003003730037111002110910101000010000100064002162229630010000103003830038300383003830038
100243003723300000006129548251001010100001010000504277313130018030037300372828732876710010201000020300003003730037111002110910101000010000000064002162229630010000103003830038300383003830038
100243003723300000006129548251001010100001010000504277313130018030037300372828732876710010201000020300003003730037111002110910101000010000000064002162229630010000103003830038300383003830038
1002430037233000000010329548251001010100001010000504277313030018030037300372828732876710010201000020300003003730037111002110910101000010000000064002162229630010000103003830038300383003830038
100243003723300000006129548251001010100001010000504277313030018030037300372828732876710010201000020300003003730037111002110910101000010000000064002162229630010000103003830038300383003830038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  udot v0.4s, v8.16b, v9.4b[1]
  movi v1.16b, 0
  udot v1.4s, v8.16b, v9.4b[1]
  movi v2.16b, 0
  udot v2.4s, v8.16b, v9.4b[1]
  movi v3.16b, 0
  udot v3.4s, v8.16b, v9.4b[1]
  movi v4.16b, 0
  udot v4.4s, v8.16b, v9.4b[1]
  movi v5.16b, 0
  udot v5.4s, v8.16b, v9.4b[1]
  movi v6.16b, 0
  udot v6.4s, v8.16b, v9.4b[1]
  movi v7.16b, 0
  udot v7.4s, v8.16b, v9.4b[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)030708090a0b18191e1f3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acbranch mispredict (cb)cfd5d6daddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204200891560000000300392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000000000101112160112006101600001002006520065200652006520065
1602042006415500000000004192580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000000000101111160112006101600001002006520065200652006520065
160204200641550000000000392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000000000101111160112006101600001002006520065200652006520065
160204200641560000000000392580100100800001008000050064000012004520064200643228010020080000200240000200642006411160201100991001001600001000000000101111160112006101600001002006520065200652006520065
160204200641550000000000392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000000000101111160112006101600001002006520065200652006520065
1602042006415600000002700392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000000000101111160112006101600001002006520065200652006520065
160204200641550000000000812580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000000000101111160112006101600001002006520065200652006520065
160204200641550000000000392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000000000101111160112006101600001002006520065200652006520065
160204200641550000000000392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000000000101111160112006101600001002006520065200652006520065
160204200641550000000000392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000000000101111160112006101600001002006520065200652006520065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire (01)cycle (02)030a1e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024200741550045278001212800001280000626400002110200322005320051322800122080000202400002005120051111600211091010160000100100251321113421174200482201160000102006120061200752005920052
16002420051155008029800121280000128000062640000311020032200512005132280012208000020240000200512005111160021109101016000010010028135123421124200482401160000102005220061200522005220052
16002420051155004527800121280000128000062640000211020032201212013132280012208000020240000200512005111160021109101016000010010030136142521146200572201160000102005220052200522005220052
16002420051156004527800121280000128000062640000311020032200512005132280012208000020240000200512005111160021109101016000010010025135142522142200482201160000102005220052200612005220052
160024200601550052027800121280000128000062640000311020032200512005132280012208000020240000200512005111160021109101016000010010027135142521142200482201160000102005220052200522005220052
16002420051155004527800121280000128000062640000311020039200512005132280012208000020240000200512005111160021109101016000010010027258564883564200482201160000102005220052200522005320052
16002420051156004527800121280000128000062640000311020041200512006032280012208000020240000200512006011160021109101016000010010030165223421124200482402160000102006120052200612005220052
16002420051155009329800121280000128000062640000311020041200602006032280012208000020240000200602006011160021109101016000010010028165243442224200572402160000102006120061200522006120061
160024200601550051298001212800001280000626400002110200322006020051322800122080000202400002006020051111600211091010160000100100281631043442224200572202160000102005320061200612006120061
16002420060155005127800121280000128000062640000311020032200602006032280012208000020240000200602006011160021109101016000010010028166243442242200622201160000102006120052200522005220061

Test 6: throughput

Count: 16

Code:

  udot v0.4s, v16.16b, v17.4b[1]
  udot v1.4s, v16.16b, v17.4b[1]
  udot v2.4s, v16.16b, v17.4b[1]
  udot v3.4s, v16.16b, v17.4b[1]
  udot v4.4s, v16.16b, v17.4b[1]
  udot v5.4s, v16.16b, v17.4b[1]
  udot v6.4s, v16.16b, v17.4b[1]
  udot v7.4s, v16.16b, v17.4b[1]
  udot v8.4s, v16.16b, v17.4b[1]
  udot v9.4s, v16.16b, v17.4b[1]
  udot v10.4s, v16.16b, v17.4b[1]
  udot v11.4s, v16.16b, v17.4b[1]
  udot v12.4s, v16.16b, v17.4b[1]
  udot v13.4s, v16.16b, v17.4b[1]
  udot v14.4s, v16.16b, v17.4b[1]
  udot v15.4s, v16.16b, v17.4b[1]
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2502

retire (01)cycle (02)0309191e373a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0eb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204400593100001041251601001001600171001600005001280000040020040039400481997331999816010020016000020048000040039400391116020110099100100160000100050010110216224003601600001004004940040400404004940040
1602044003931100000246251601001001600001001600005001280000040029040039400391997331999716010020016000020048000040039400481116020110099100100160000100010010110216224003601600001004004940049400494004940284
160204400393110000050251601001001600001001600005001280000040020040048400481997332000616010020016000020048000040039400481116020110099100100160000100010010110216224004501600001004004040049400494009040040
160204400483100000041251601001001600001001600005002398999140029040039400391997332000616010020016000020048000040039400481116020110099100100160000100000010110216224004501600001004004040040400494004040049
1602044003931000017041251601001001600001001600005001280000140029040039400481997331999716010020016000020048000040048400391116020110099100100160000100003010110216224003601600001004004040049400404004040040
160204400393110000050251601001001600171001600005001280000140020040048400481997332000616010020016000020048000040039400401116020110099100100160000100020010110216224003601600001004004040049400404004040040
160204400393100000050251601171001600001001600005001280000140029040039400481997331999716010020016000020048000040039400481116020110099100100160000100000010110216224004501600001004004040040400494004040049
160204400483100000041251601011001600171001600005001280000140020040039400481997331999716010020016000020048000040048400391116020110099100100160000100040010110216224004501600001004004040049400404004040040
1602044003931000917050251601171001600001001600005001280000140020040048400481997331999716010020016000020048000040048400391116020110099100100160000100000010110216224004501600001004004040040400404005340040
1602044004831000017041251601171001600001001600005001280000140029040039400391997331999716010020016000020048000040040400711116020110099100100160000100000010110216224004501600001004004940049400404004040040

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2502

retire (01)cycle (02)03080b1e1f373f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8a9acc5cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002440048310000004625160010101600171016000050131999911400290400394004819996320028160010201600002048000040048400391116002110910101600001000003001002231111162111111400360206160000104004940040400494004040049
1600244004931000000552516013812160097101600005023989991140029040039400481999632002816001020160000204800004004840039111600211091010160000100000270100223111016339921112400450156160000104004040041400494004940040
1600244003931000001746251600101016000310160000502438865114002004003940039199963200191600102016000020480000400404003911160021109101016000010000060100223111016412119400360155160000104005040040400494004040040
160024400393110000052251600271016000210160000502398999114002004003940039199963200191600102016000020480000400394003911160021109101016000010000016801002231191621198400360156160000104004040040400404004040040
160024400483100000046251600101016003410160000501280000114002004003940039199963200191600102016000020480000400394003911160021109101016000010000017101002231110162111010400360155160000104004140050400404004040040
1600244003931000001755251600101016000210160000502398999114002004003940039199963200191600102016000020480000400394003911160021109101016000010000010201002231191621198400360155160000104004940040400404004040041
16002440039310000004625160027101600151016000050239899911400200400394003919996320019160010201600002048000040039400391116002110910101600001000000010022311816282911011400450155160000104005040041400404004040040
160024400393100000153251600101016005710160000502398999114003004003940039199963200191600102016000020480000400394003911160021109101016000010001060100226229162111194003603010160000104005040041400494005040040
16002440048311000005225160010101600351016000050239899911400200400394003919996320019160010201600002048000040049400391116002110910101600001000001800100223119162111110400360157160000104004140049400504004040040
16002440039310000004625160011101600021016000050128000011400200400394003919996320029160010201600002048000040039400391116002110910101600001000001290100246128162111111400360305160000104004140049400404004040040