Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

MLA (vector, 8H)

Test 1: uops

Code:

  mla v0.8h, v1.8h, v2.8h
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03mmu table walk data (08)181e3f4e51schedule uop (52)schedule simd uop (54)dispatch simd uop (57)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map simd uop (7e)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd alu (9a)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)f5f6f7f8fd
10043037220006125482510001000100039831313018303730372415328951000100030003037303711100110001073216112630100030383038303830383038
10043037220006125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
10043037220006125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
10043037230006125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
1004303722019103254825100010001000398313030183037303724153289510001000300030373037111001100003673116112630100030383038303830383038
10043037230006125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
10043037230006125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
10043037230006125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
10043037220006125482510001000100039831303018303730372415328951000100030003037303711100110000373116112630100030383038303830853038
100430372300061254825100010001000398313030183037303724153289510001000300030373037111001100001273116112630100030383038303830383038

Test 2: Latency 1->1

Code:

  mla v0.8h, v1.8h, v2.8h
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)1e3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9accfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
102043003722500006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000710121622296340100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000710021622296340100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000710021622296340100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000710021623296340100001003003830038300383003830038
1020430037225000015629548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000710121622296340100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000710121622296340100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000710121622296340100001003003830038300383003830038
102043003722400016129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000781121622296340100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000710121622296340100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000000712121622296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)0318191e1f3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
10024300372250000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000000640316222963010000103003830038300383003830038
10024300372250015688612954825100101010000101000050427731330018300373003728287328767100102410000203000030037300371110021109101010000100000000640216222963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000000640216222963010000103003830038300383003830038
100243003722500001242954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000000640216222963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000000640216222963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000030640216222963010000103003830038300383003830038
10024300372250115610427242949416110026121004810110436142863903027030406303222831634288801075720110032033471303663036681100211091010100001022002192832746216222963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000000640216222963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100020000640216222963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100000000640216222963010000103003830038300383003830038

Test 3: Latency 1->2

Code:

  mla v0.8h, v0.8h, v1.8h
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
102043003722500003660061295482510100100100001001000050042773133001830037300372826503287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
1020430037225000029400119295482510100100100001001000050042773133001830037300372826503287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
102043003722500003660061295482510100100100001001000050042773133001830037300372826503287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
102043003722500003270061295482510100100100001001000050042773133001830037300372826503287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
102043003722500003210061295482510100100100001001000050042773133001830037300372826503287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
1020430037225000000061295482510100100100001001000050042773133001830037300372826503287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
102043003722500003330061295482510100100100001001000050042773133001830037300372826503287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
1020430037225000030061295482510100100100001001000050042773133001830037300372826503287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038
1020430037225000033300612954825101001001000010010000500427731330018300373003728265732874510100200100002003000030037300371110201100991001001000010000000033071011611296340100001003003830038300383003830038
1020430037225000000061295482510100100100001001000050042773133001830037300372826503287451010020010000200300003003730037111020110099100100100001000000000071011611296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)0318191e1f3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)ld unit uop (a6)l1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1002430037225007206129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
100243003722500006129548251001010100001010148504277313130018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
10024300372250019506129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
10024300372250027606129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
100243003722500006129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
100243003722500237063129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
1002430037225000072629548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630010000103003830038300383003830038
100243003722500006129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100000006402162229630110000103003830038300383003830038
1002430037225001206129494138100671710048181104365428545503023430180303672832233289001090724109892632922304153031831100211091010100001002101947807882642229740410000103037130227302753037030370
10024303692257694561640662949465100731610056131104394428681213027030369303692831741288991107420111502033441303683040471100211091010100001022021954027873734429702310000103036730132304193036830371

Test 4: Latency 1->3

Code:

  mla v0.8h, v1.8h, v0.8h
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)03l2 tlb miss data (0b)181e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1020430037225002461295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011611296910100001003003830038300383003830038
10204300372250032161295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011611296340100001003003830038300383003830038
102043003722500061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011611296340100001003003830038300383003830038
102043003722500061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011611296340100001003003830038300383003830038
1020430037225005161295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011611296340100001003003830038300383003830038
10204300372250039361295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011611296340100001003003830038300383003830038
10204300372250029461295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011611296340100001003003830038300383003830038
102043003722500061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011611296340100001003003830038300383003830082
10204300372240022861295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011611296340100001003003830038300383003830038
10204300372240042361295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000071011611296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)03l2 tlb miss data (0b)1e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb miss (a1)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
10024300372250061295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
10024300372250061295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
10024300372250061295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
10024300372250061295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
100243003722501561295482510010101000010101485042773133001830037300372828782876710010201000020300003003730037111002110910101000010000640224222963010000103003830038300383003830038
10024300372240061295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
10024300372250061295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038
10024300372250061295392510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383013230038
1002430037225116261295482510010101000010100005042773133001830037300372828732876710010201016120300003003730037111002110910101000010400640216222963010000103003830038300383003830038
100243003722403661295482510010101000010100005042773133001830037300372828732876710010201000020300003003730037111002110910101000010000640216222963010000103003830038300383003830038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  mla v0.8h, v8.8h, v9.8h
  movi v1.16b, 0
  mla v1.8h, v8.8h, v9.8h
  movi v2.16b, 0
  mla v2.8h, v8.8h, v9.8h
  movi v3.16b, 0
  mla v3.8h, v8.8h, v9.8h
  movi v4.16b, 0
  mla v4.8h, v8.8h, v9.8h
  movi v5.16b, 0
  mla v5.8h, v8.8h, v9.8h
  movi v6.16b, 0
  mla v6.8h, v8.8h, v9.8h
  movi v7.16b, 0
  mla v7.8h, v8.8h, v9.8h
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire uop (01)cycle (02)03l1i tlb fill (04)1e3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6erob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)acbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16020420091150192925801161008001610080028500640196120045200652009006128012820080028200240084200652006511160201100991001001600001000011110119116102006201600001002006620066200662006620066
16020420065150002925801161008001610080028500640196120045200652006506128012820080028200240084200652006511160201100991001001600001000011110119116112006201600001002006620066200662006620066
16020420065151002925801161008001610080028500640196120045200652006506128012820080028200240084200652006511160201100991001001600001000011110119016112006201600001002006620066200662006620066
16020420065150002925801161008001610080028500640196120045200652006506128012820080028200240084200652006511160201100991001001600001000011110119016012006201600001002006620066200662006620066
16020420065150002925801161008001610080028500640196120045200652006506128012820080028200240084200652006511160201100991001001600001000011110119016002006201600001002006620066200662006620066
16020420065150002925801161008001610080028500640196120045200652006506128012820080028200240084200652006511160201100991001001600001000011110119016012006201600001002006620066200662006620066
16020420065150002925801161008001610080028500640196120045200652006506128012820080028200240084200652006511160201100991001001600001000011110119116002006201600001002006620066200662006620066
160204200651500182925801161008001610080028500640196120045200652006506128012820080028200240084200652006511160201100991001001600001000011110119116102006201600001002006620066200662006620066
16020420065150002925801161008001610080028500640196120045200652006506128012820080028200240084200652006511160201100991001001600001000011110119116012006201600001002006620066200662006620066
16020420065151002925801161008001610080028500640196120045200652006506128012820080028200240084200652006511160201100991001001600001000011110119116112006201600001002006620066200662006620066

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2507

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)1e3a3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaec? simd retires (ee)? int retires (ef)f5f6f7f8fd
160024200901500001005727800121280000128000062640000012200342005320053322800122080000202400002005320053111600211091010160000100010061511147271114038200502211160000102005420054200542005420054
160024200531511101004527800121280000128000062640000112200342005320053322800122080000202400002005320053111600211091010160000100010060511139271113838200502211160000102005420054200542015820110
160024200531501200004527800121280000128000062640000112200342005320053322800122080000202400002005320053111600211091010160000100010046821135271113736200502211160000102005420054200542005420054
160024200531501001004527800121280000128000062640000115200342005320053322800122080000202400002005320053111600211091010160000100010060821123271113738200502211160000102005420054200542005420054
160024200531551100014527800121280000128000062640000115200342005320053322800122080000202400002005320053111600211091010160000100010061821138271114140200502211160000102005420054200542005420054
16002420053150120221125927800121280000128000062640000115200342005320053322800122080000202400002005320053111600211091010160000100010062821127271112638200502211160000102005420054200542005420054
160024200531501102006927800121280000128000062640000115200342005320053365800122080000202400002005320053111600211091010160000100010063821133271113440200502211160000102005420054200542015320054
1600242006215013030175298001212800001280000626400001152004320053200623228001220800002024000020062200621116002110910101600001000100601131139363224039200592412160000102006320063200632005420054
16002420062150120318175298001212800001280000626400000152004320062200623228001220800002024000020053200621116002110910101600001000100541131140361223535200592412160000102005420063200632006320063
1600242005315011030175278001212800001280000626400000152004320062200533228001220800002024000020062200621116002110910101600001000100631122126363122840200592412160000102006320054200632006320063

Test 6: throughput

Count: 16

Code:

  mla v0.8h, v16.8h, v17.8h
  mla v1.8h, v16.8h, v17.8h
  mla v2.8h, v16.8h, v17.8h
  mla v3.8h, v16.8h, v17.8h
  mla v4.8h, v16.8h, v17.8h
  mla v5.8h, v16.8h, v17.8h
  mla v6.8h, v16.8h, v17.8h
  mla v7.8h, v16.8h, v17.8h
  mla v8.8h, v16.8h, v17.8h
  mla v9.8h, v16.8h, v17.8h
  mla v10.8h, v16.8h, v17.8h
  mla v11.8h, v16.8h, v17.8h
  mla v12.8h, v16.8h, v17.8h
  mla v13.8h, v16.8h, v17.8h
  mla v14.8h, v16.8h, v17.8h
  mla v15.8h, v16.8h, v17.8h
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2502

retire uop (01)cycle (02)031e373f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
16020440060299004225160100100160000100160000500128000040020400394004819973320006160100200160000200480000400394004811160201100991001001600001000052021011015911400361600001004005040050400494004940349
160204400393000175025160117100160000100160000500239899940021400394004019973319997160100200160000200480000400394004011160201100991001001600001000014701011011611400371600001004004040040400404004940040
160204400403020042251601001001600001001600005002398999400204004840039199733199981601002001600002004800004004840039111602011009910010016000010000001011011611400361600001004004040040400404004940040
16020440040300005025160100100160001100160000500239902740020400394004819973319997160100200160000200480000400394003911160201100991001001600001000013201011011611400361600001004004940040400414004040049
1602044003930001741251601171001600171001600005002399055400294003940039199873200061601002001600002004800004003940040111602011009910010016000010000001011011611400361600001004004040040400494004040040
160204400403000174125160101100160000100160000500239899940030400404004819973320007160100200160000200480000400404003911160201100991001001600001000014701011011611400361600001004004040040400404004040040
16020440039300004125160100100160000100160000500131999840029400484003919973319997160100200160000200480000400394004811160201100991001001600001000015901011011611400451600001004004040040400494004040049
1602044003929900412516011710016001710016000050013659194002940048400391997331999716010020016000020048000040039400481116020110099100100160000100001801011011611400371600001004004040040400414004040049
16020440048300017525251601001001600001001600005001280000400294003940040199733200061601002001600002004800004004840039111602011009910010016000010000001011011611400701600001004004040040400404004040040
160204400393000174125160100100160001100160000500128000040029400394003919973320006160100200160000200480000400404004911160201100991001001600001000014401011011611400361600001004004040040400494004040041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2502

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f373f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)5f6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d cache writeback (a8)acc2branch mispred nonspec (cb)cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaeb? simd retires (ee)? int retires (ef)f5f6f7f8fd
1600244008230000000001200460251600101016000010160000501280000110400204003940039199963200281600102016000020480000400394003911160021109101016000010020001002281133162112427400360206160000104004040040400404004040040
160024400393000000000000460251600101016000010160000501280000115400304003940048199963200191600102016000020480000400494003911160021109101016000010000001002284122612112325400460206160000104004040050400504004040040
1600244010230000000000018460251600101016000010160000501280000115400204003940039199963200291600102016000020480000400394003911160021109101016000010000001002283123162112324400360207160000104004040040400404004040040
160024400393000000000000460251600101016000010160000501280000115400204003940039199963200191600102016000020480000400394003911160021109101016000010000001002284123162112323400360206160000104004040040400404004040040
160024400392990000000000460251600101016000010160000501280000110400204003940039199963200191600102016000020480000400394003911160021109101016000010000001002284125162112324400460209160000104004040040400404005040040
16002440039300000000000056025160010101600181016000050128000000040020400494004919996320019160010201600002048000040039400491116002110910101600001000000100241152231622122234003604014160000104004040040400404004040040
160024400392990000000000520251600281016000010160000501280000015400704003940049199963200191600102016000020480000400394003911160021109101016000010000001002281122162112322400360206160000104004040040400404004040040
160024400393000000000000460251600101016001810160000502398999110400204003940039199963200191600102016000020480000400394003911160021109101016000010000001002284123162112323400360389160000104005040040400414004040040
160024400392990000000000880251600281016000010160000501280000100400204003940039199963200191600102016000020480000400394003911160021109101016000010000001002284122162112322400360206160000104004040040400404005040050
1600244003929900000000018460251600101016000010160000501280000115400204003940039199963200191600102016000020480000400394003911160021109101016000010000001002231124162112222400360206160000104004040041400404004040040