Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

SDOT (by element, 8B)

Test 1: uops

Code:

  sdot v0.2s, v1.8b, v2.4b[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
100430372306125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372206125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372206125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372208425482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372206125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038

Test 2: Latency 1->1

Code:

  sdot v0.2s, v1.8b, v2.4b[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03191e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc5branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204300372250004162954825101001001000010010000500427731303001830037300372827262874010100200100082003002430037300371110201100991001001000010000111718001600296460100001003003830038300383003830038
10204300372250001912954825101001001000010010000500427731303001830037300372827262874110100200100082003002430037300371110201100991001001000010000111717001600296470100001003003830038300383003830038
1020430037225000842954825101001001000010010000500427731303001830037300372827262874110100200100082003002430037300371110201100991001001000010000111717001600296460100001003003830038300383003830038
10204300372250002512954825101001001000010010000500427731303001830037300372827262874010100200100082003002430037300371110201100991001001000010000111717001600296460100001003003830038300383003830038
10204300372241001032953925101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000710121622296340100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000710121622296340100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731303001830037300372828732874510100200100002003000030037300371110201100991001001000010000000710131622296340100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000710121622296340100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000710121622296340100001003003830038300383003830038
10204300372250001032954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000710131622296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03070a1e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10024300372251101130295482510010101000010100005042773133001803003730037282873287671001020100002030000300373003711100211091010100001000642716862963010000103003830038300383003830038
10024300372251191214295482510010101000010100005042773133001803003730037282873287671001020100002030000300373003711100211091010100001000642816992963010000103003830038300383003830038
10024300372251101128295482510010101000010100005042773133001803003730037282873287671001020100002030000300373003711100211091010100001000644916992963010000103003830038300383003830038
10024300372251101107295482510010101000010100005042773133001803003730037282873287671001020100002030000300373003711100211091010100001000644916992963010000103003830038300383003830038
10024300372241101258295482510010101000010100005042773133001803003730037282873287671001020100002030000300373003711100211091010100001000642616792963010000103003830038300383003830038
10024300372251101816295362510010101000010100005042773133001803003730037282873287671001020100002030000300373008411100211091010100001000642916992963010000103003830038300383003830038
10024300372251101652954825100101010000101000050427731330018030037300372828732876710010201000020300003003730037111002110910101000010006449169102963010000103003830038300383003830038
10024300372251101751295482510010101000010100005042773133001803003730037282873287671001020100002030000300373003711100211091010100001000642716682963010000103003830038300383003830038
100243003722511015372954825100101010000101000050427731330018030037300372828732876710010201000020300003003730037111002110910101000010006428161082963010000103003830038300383003830038
1002430037225110131829548251001010100001010000504277313300180300373003728287328767100102010000203000030037300371110021109101010000100064210161092963010000103003830038300383003830038

Test 3: Latency 1->2

Code:

  sdot v0.2s, v0.8b, v1.4b[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)0318191e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8a9aaaccdcfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
10204300372250006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007102162229634100001003003830038300383003830038
102043003722500021629548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000007102162229634100001003008630173300383003830038
102043003722500072629548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007102162229634100001003003830038300383003830038
10204300372250006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007102162229634100001003003830038300383003830038
102043003722500072629548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000017102162229634100001003003830038300383003830038
1020430037225000536295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000001507102162229634100001003003830038300383003830038
10204300372250006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007102162229634100001003003830038300383003830038
10204300372250006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000007102162229634100001003003830038300383003830038
102043003722500053629548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007102162229634100001003003830038300383003830038
10204300372250006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007102162229634100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)031e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
100243003722512010329548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001010640316332963010000103003830038300383003830038
1002430037225282014529548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000640316332963010000103003830038300383003830038
100243003722516506129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000640316332985710000103003830038300383003830038
1002430037224006129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000640316332963010000103003830038300383003830038
1002430037225006129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000640316332963010000103003830038300383003830038
1002430037225006129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000640316332963010000103003830038300383003830038
1002430037225006129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000640316332963010000103003830038300383003830038
1002430037225006129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000640316332963010000103003830038300383003830038
1002430037225006129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000640316332963010000103003830038300383003830038
10024300372252106129548251001010100001010000504277313300183003730037282873287671001020100002030000300373003711100211091010100001000640316332963010000103003830038300383003830038

Test 4: Latency 1->3

Code:

  sdot v0.2s, v1.8b, v0.4b[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)030b1e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8acc2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1020430037225000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000071021622296340100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000071021622296340100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000071021622296340100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000071021622296340100001003003830038300383003830038
1020430037224000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000071021622296340100001003003830038300383003830038
1020430037224000822954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000271021622296340100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000071021622296340100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731303001830037300372826532874510100200100002003000030037300371110201100991001001000010000000071021622296340100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000071021622296340100001003003830038300383003830038
1020430037225000612954825101001001000010010000500427731313001830037300372826532874510100200100002003000030037300371110201100991001001000010000000071021622296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire (01)cycle (02)03080b1e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acc2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1002430037225003361295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000006405162229630010000103003830038300383003830038
100243003722500061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001010006402162229630010000103003830038300383003830038
100243003722500061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000006402162229630010000103003830038300383003830038
100243003722500061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000006401162229630010000103003830038300383003830038
1002430037225000796295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000006402162229630010000103003830038300383003830038
100243003722500061295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001000006402162229630010000103003830038300383003830038
100243003722500061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000006402162229630010000103003830038300383003830038
100243003722500061295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001003006402162229630010000103003830038300383003830038
10024300372250018536295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000006402162229630010000103003830038300383003830038
10024300372250024578295482510010101000010100005042773130300183003730037282873287671001020100002030000300373003711100211091010100001010006402162229630010000103003830038300383003830038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  sdot v0.2s, v8.8b, v9.4b[1]
  movi v1.16b, 0
  sdot v1.2s, v8.8b, v9.4b[1]
  movi v2.16b, 0
  sdot v2.2s, v8.8b, v9.4b[1]
  movi v3.16b, 0
  sdot v3.2s, v8.8b, v9.4b[1]
  movi v4.16b, 0
  sdot v4.2s, v8.8b, v9.4b[1]
  movi v5.16b, 0
  sdot v5.2s, v8.8b, v9.4b[1]
  movi v6.16b, 0
  sdot v6.2s, v8.8b, v9.4b[1]
  movi v7.16b, 0
  sdot v7.2s, v8.8b, v9.4b[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9faccfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602042006515003925801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010001011221612200611600001002006520065200652006520065
1602042006415003925801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010001011111611200611600001002006520065200652006520065
16020420064150273925801001008000010080000500640916120045200642006432280100200800002002400002006420064111602011009910010016000010001011111611200611600001002006520065200652006520065
16020420064150303925802121008010810080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010001011111612200611600001002006520065200652006520065
16020420064150153925801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010001011221611200611600001002006520065200652006520065
16020420064150153925801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010001011111612200611600001002006520065200652006520065
1602042006415003925801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010031011111612200611600001002006520065200652006520065
1602042006415003925801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010001011111621200611600001002006520065200652006520065
1602042006415063925801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010001011211611200611600001002006520065200652006520065
16020420064150051425801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010001011111612200611600001002006520065200652006520065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2517

retire (01)cycle (02)030708090b18191e1f3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002420087150100000000452780012128000012800006264000011020032200512005132280012208000020240000200512006011160021109101016000010000000010032134162521143200482201160000102005220061200522005220052
160024200511550000000005127800121280000128000062640000101020032200512005132280012208000020240000200512005111160021109101016000010000000010028131162521156200482201160000102005220052200522006120052
160024200511500000000004529800121280000128000062640000101020032200512005132280012208000020240000200512005111160021109101016000010000000010028137142541165200482201160000102005220052200522006120061
1600242005115000000000051278001212800001280000626400001102004120051200513228001220800002024000020051200511116002110910101600001000000001002731152521155200482401160000102005220052200522005220052
16002420051150000000000452780012128000012800006264000010020032200512005132280012208000020240000200512005111160021109101016000010000000010031137162521164200482201160000102005220052200522005220052
16002420051150000000000452780012128000012800006264000010020032200512005132280012208000020240000200512005111160021109101016000010000000010027167162541174200482201160000102005220052200522005220052
160024200511500000000007227800121280000128000062640000001020032200512005132280012208000020240000200512005111160021109101016000010000000010029132162521166200482201160000102005220052200522005220052
16002420051150000000000552780012128000012800006264000011102003220051200513228001220800002024000020051200511116002110910101600001000000001002837142521164200482201160000102006120052200522005220052
160024201221500000000004527800121280000128000062640000111020041200512005132280012208000020240396200622005111160021109101016000010000303010030131142522144200482201160000102005220052200522005220061
160024200511510000000004527800121280000128000062640000101020041200512005132280012208000020240000200512005111160021109101016000010000000010029137142521155200482201160000102005220052200522005220052

Test 6: throughput

Count: 16

Code:

  sdot v0.2s, v16.8b, v17.4b[1]
  sdot v1.2s, v16.8b, v17.4b[1]
  sdot v2.2s, v16.8b, v17.4b[1]
  sdot v3.2s, v16.8b, v17.4b[1]
  sdot v4.2s, v16.8b, v17.4b[1]
  sdot v5.2s, v16.8b, v17.4b[1]
  sdot v6.2s, v16.8b, v17.4b[1]
  sdot v7.2s, v16.8b, v17.4b[1]
  sdot v8.2s, v16.8b, v17.4b[1]
  sdot v9.2s, v16.8b, v17.4b[1]
  sdot v10.2s, v16.8b, v17.4b[1]
  sdot v11.2s, v16.8b, v17.4b[1]
  sdot v12.2s, v16.8b, v17.4b[1]
  sdot v13.2s, v16.8b, v17.4b[1]
  sdot v14.2s, v16.8b, v17.4b[1]
  sdot v15.2s, v16.8b, v17.4b[1]
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2502

retire (01)cycle (02)0308091e373f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a8a9acc2c5cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204400843000001502516010010016000010016000050023989991140020400394003919973320006160100200160000200480000402664004011160201100991001001600001000000330010110216114003601600001004004940072400494007240040
160204400393000001741251605801001600001001600005001280000014002940039400481997332000616010020016000020048000040048400391116020110099100100160000100000000010110116114003601600001004004040040400494004040040
16020440039300009083251601171001600001001600005001540100014002040039400391997332000616010020016000020048000040039400391116020110099100100160000100001404010110116114004501600001004004040040400404004040040
1602044004830000121741251601001001600171001600005002399027014002040048400711997332002916010020016000020048000040039400491116020110099100100160000100000000010110116114003601600001004004040040400494004040040
16020440039300000041251601001001600001001600005001280000114002040049400391997331999716010020016000020048000040040400391116020110099100100160000100000000010110116114003601600001004005040040400404005040040
16020440049300000142251601001001600001001600005001280000014002040039400711998031999716010020016000020048000040039400391116020110099100100160000100000000010110116114003601600001004004040040400494004040040
16020440040300000041251601001001600001001600005001320000014003040039400401997331999716010020016000020048000040039400391116020110099100100160000100000000010110116114003601600001004004040040400504004940049
16020440039300000041251601001001600001001600005001280000014002140039400481997332000716074320016000020048000040039400391116020110099100100160000100000000010110116114003601600001004011840040400404004040040
16020440039299009050251601001001600001001600005001280000014002040039400481997331999716010020016000020048000040039400391116020110099100100160000100000000010110116114003601600001004004040040400414004040050
16020440039300000050251601011001600171001600005001280000014002040039400391997331999716010020016000020048000040040400391116020110099100100160000100002021300101101161140046141600001004004040049400404004040040

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2503

retire (01)cycle (02)0307191e373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8acc2cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600244010130000005602516002710160000101600005023989991140029040040400391999632001916001020160000204800004004040049111600211091010160000100000100223113164126640036409160000104004940040400494004040049
1600244004930001017610251600271016000010160000502398999014002004003940048199963200281600102016000020480000400484003911160021109101016000010000010024621461622254400364012160000104004040049400404004040049
16002440048300000062025160010101600001016000050239899911400290400484003919996320019160010201600002048000040039400391116002110910101600001000001002462151642155400454012160000104004940040400404007240049
16002440054300000071702516001010160017101600005053871880140029040048400391999632001916001020160000204800004003940048111600211091010160000100021601002462241642145400364012160000104004040049400404004040040
16002440049300100052025160027101600171016000050538718801400200400484007119996320019160010201600002048000040048400401116002110910101600001000001002462261642255400364018160000104004940040400494004040049
1600244005430000008202516002710160017101600005012800001140020040039400391999632001916001020160000204800004003940048111600211091010160000100000100246215164214540045406160000104007240040400724004040040
1600244012430000007302516002710160017101600005053871881140052040039400481999632001916001020160000204800004003940039111600211091010160000100000100223115162115540045407160000104004940040400404004040049
16002440048299000067025160010101600001016000050128000011400200400394004819996320019160010201600002048000040039400481116002110910101600001000720100223114162114540045206160000104004940041400494004040049
160024400483000001776202516001010160000101600005012800001140029040039400391999632001916001020160000204800004003940039111600211091010160000100000100223115162114540045208160000104004040049400404004040040
16002440045300000092002516001010160000101600005013199981140020040039400391999632002816001020160000204800004004840048111600211091010160000100000100223115162115540045208160000104004040040400984004140049