Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

SMLSL (by element, 4S)

Test 1: uops

Code:

  smlsl v0.4s, v1.4h, v2.h[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)0318191e3f4e51schedule uop (52)schedule simd uop (54)dispatch simd uop (57)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map simd uop (7e)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd alu (9a)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)f5f6f7f8fd
10043037220006125482510001000100039831313018303730372415328951000100030003037303711100110000073116312698100030383038303830383038
10043037230006125482510001000100039831303018303730372415328951000100030003037303711100110002073116112630100030383038303830383038
10043037220006125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
10043037220006125482510001000100039831313018303730372415328951000100030003037303711100110000373116112630100030383038303830383038
10043037220006125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
10043037230006125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
10043037220006125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
10043037230006125482510001000100039831303018303730372415328951000100030003037303711100110001073116112630100030383038303830383038
10043037220006125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
10043037230006125482510001000100039831303018303730372415328951000100030003037303711100110000073116112630100030383038303830383038

Test 2: Latency 1->1

Code:

  smlsl v0.4s, v1.4h, v2.h[1]
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acc2branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
10204300372250000000066295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000050300710021622296340100001003003830038300383003830038
10204300372250000000061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000010000710021622296340100001003003830038300383003830038
10204300372240000090082295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000030000710021623296340100001003003830038300383003830038
10204300372250000000061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000710021622296340100001003003830082300383003830038
10204300372250000000061295482510100100100001041000052242773133001830037300372826532874510100204100002003000030037300371110201100991001001000010000000000710131622296340100001003003830038300383003830038
10204300372250000000061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000710121622296340100001003003830038300383003830038
10204300372250000000094295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000710121622296340100001003003830038300383003830038
10204300372250000000061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000710121622296340100001003003830038300383003830038
10204300372250000000061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000000000710121622296340100001003003830038300383003830038
10204300372250000000061295482510100100100001001000050042773133001830037300852826532874510100200100002003000030037300371110201100991001001000010000000000710121622296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)1e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)acbranch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
10024300372250000612954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000640316332963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000640316332963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000640316332963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000640316332963010000103003830038300383003830038
10024300372251110612954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000640316332963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010030640316332963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010100640316332963010000103003830038300383003830038
100243003722500006129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000103200640316332963010000103003830038300383003830038
100243003722500006129548251001010100001010000504277313130018300373003728287242876710010201000020300003003730037111002110910101000010000640316332963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000640316332963010000103003830038300383003830038

Test 3: Latency 1->2

Code:

  smlsl v0.4s, v0.4h, v1.h[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)03mmu table walk data (08)191e1f3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
1020430037225000061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000007101161129634100001003003830038300383003830038
102043003722400001187295094510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000007101161129634100001003003830038300383003830038
1020430037225000061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000007101161129634100001003003830038300383003830038
1020430037225000061295482510100100100001001000050042773130300183003730037282653287451010020010000200305043008530037111020110099100100100001000000007101161129634100001003003830038300383003830038
10204300372250300321295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000007101161129634100001003003830038300383003830038
1020430037225000061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000007101161129634100001003003830038300383003830038
1020430037225000061295484410100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000007101161129634100001003003830038300383003830038
1020430037225000061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000007101161129634100001003003830038300383003830038
10204300372250012061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000007101161129634100001003003830218300383003830038
1020430037225000061295482510100100100001001000050042773131300183003730084282656287451010020010000200300003003730037111020110099100100100001004000007101161129634100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)033f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9faccfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
10024300372258229548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100640316222963010000103003830038300383003830038
100243003722547229548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100640216222963010000103008630121300863018030038
100243003722516629548251001010100001010000504277313030018300843003728287328767100102010000203000030037300371110021109101010000100640216222963010000103003830038300383003830038
100243003722518729548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100640216222963010000103003830038300383003830038
100243003722578929548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100640216222963010000103003830038300383003830038
10024300372256129548251001911100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100640216222963010000103003830038300383003830038
100243003722516629548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100640216222963010000103003830038300383003830038
10024300372258229548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000103640216222963010000103003830038300383003830038
100243003722410329548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100640216222963010000103003830038300383003830038
100243003722521029548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100640216222963010000103003830038300383003830038

Test 4: Latency 1->3

Code:

  smlsl v0.4s, v1.4h, v0.h[1]
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)031e1f3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)acbranch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
10204300372250010529548251010010010000100100005004277313130018300373003728265032874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10204300372250014529548251010010010000100100005004277313030018300373003728265032874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10204300372250012429548251010010010000100100005004277313030018300373003728265032874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10204300372250014529548251010010010000100100005004277313130018300373003728265032874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10204300372250012429548251010010010000100100005004277313130018300373003728265032874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10205300372250012429548251010010010000100100005004277313030018300373003728265332874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
1020430037225008229548251010010010000100100005004277313030018300373003728265032874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10204300372250016829548251010010010000100100005004277313030018300373003728265032874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10204300372240010329548251010010010000100100005004277313030018300373003728265032874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038
10204300372250016629548251010010010000100100005004277313030018300373003728265032874510100200100002003000030037300371110201100991001001000010000071011611296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)03l2 tlb miss data (0b)191e3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)ld unit uop (a6)l1d cache writeback (a8)a9accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
10024300372250002662954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000006441216121229630010000103003830038300383003830038
1002430037225000217802954825100101010000101000050427731313001830037300372828732876710010201016220305043008430085211002110910101000010000406671116111129630010000103008630038300383003830038
100243003722500026629530451001911100081010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100000064491613929630010000103003830038300383003830038
10024300372250002108295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000644111661129630010000103003830038300383003830038
10024300372250002662954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000006441116111229630010000103003830080300383003830038
1002430037225000266295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000644111612729630010000103003830038300383003830038
10024300372250002261295482510010101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000644616111129630010000103003830038300383003830038
10024300372250002662954825100211010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000006446169929630010000103003830038300383003830038
1002430037225000266295482510019101000010100005042773131300183003730037282873287671001020100002030000300373003711100211091010100001000000644111661129630010000103003830038300383003830038
10024300372250002662954825100101010000101000050427731313001830037300372828732876710010201000020300003003730037111002110910101000010000006441316121229630010000103003830038300383003830038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  smlsl v0.4s, v8.4h, v9.h[1]
  movi v1.16b, 0
  smlsl v1.4s, v8.4h, v9.h[1]
  movi v2.16b, 0
  smlsl v2.4s, v8.4h, v9.h[1]
  movi v3.16b, 0
  smlsl v3.4s, v8.4h, v9.h[1]
  movi v4.16b, 0
  smlsl v4.4s, v8.4h, v9.h[1]
  movi v5.16b, 0
  smlsl v5.4s, v8.4h, v9.h[1]
  movi v6.16b, 0
  smlsl v6.4s, v8.4h, v9.h[1]
  movi v7.16b, 0
  smlsl v7.4s, v8.4h, v9.h[1]
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire uop (01)cycle (02)0318191e3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
1602042006515000081258010010080000100800005006400001200452006420064322801002008000020024000020064200641116020110099100100160000100001011131611200611600001002006520132200652006520065
1602042006415100081258010010080000100800005006400001200452006420064322801002008000020024000020064200641116020110099100100160000100001011111611200611600001002006520065200652006520065
1602042006415000039258010010080000100800005006400001200452006420064322801002008000020024000020064200641116020110099100100160000100001011111611200611600001002006520065200652006520065
1602042006415000039258010010080000100800005006400001200452006420064322801002008000020024000020064200641116020110099100100160000100001011111611200611600001002006520065200652006520065
16020420064151000463258010010080000100800005006400001200452006420064322801002008000020024000020064200641116020110099100100160000100001011111611200611600001002006520065200652006520065
1602042006415000039258010010080000100800005006400001200452006420064322801002008000020024000020064200641116020110099100100160000100001011111611200611600001002006520065200652006520065
16020420064150001239258010010080000100800005006400001200452006420064322801002008000020024000020064200641116020110099100100160000100001011111611200611600001002006520065200652006520065
1602042006415000039258010010080000100800005006400001200452006420064322801002008000020024000020064200641116020110099100100160000100001011111611200611600001002006520065200652006520065
1602042006415000039258010010080000100800005006400001200452006420064322801002008000020024000020064200641116020110099100100160000100001011111611200611600001002006520065200652006520065
16020420064150000771258010010080000100800005006400001200452006420064322801002008000020024000020064200641116020110099100100160000100001011111611200611600001002006520065200652006520065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)5f60696b6d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaec? simd retires (ee)? int retires (ef)f5f6f7f8fd
1600242007415004527800121280000128000062640000112003202005120051322800122080000202400002005120051111600211091010160000100010029311122521159202352201160000102005220052200522005220052
16002420051150045278001212800001280000626400001120032020051200513228001220800002024000020051200511116002110910101600001000100323111025211106202432201160000102005220052200522005220052
1600242005115001082780012128000012800006264000011200320200512005132280012208000020240000200512005111160021109101016000010001003231152521159202082201160000102005220052200522005220052
16002420060150015427800121280000128000062640000112003202005120051322800122080000202400002005120051111600211091010160000100010028311102521169202262201160000102005220052200522005220052
160024200511500458880012128000012800006264000011200320200512005132280012208000020240000200512005111160021109101016000010001002831192521199202182201160000102005220052200522005220052
1600242005115006827800121280000128000062640000112003202005120051322800122080000202400002005120051111600211091010160000100010032311102521159202112201160000102005220052200522005220052
1600242005115003932780012128000012800006264000011200320200512005132280012208000020240000200512005111160021109101016000010101003231153421195202042201160000102005220052200522005220052
160024200511500452980012128000012800006264000011200320200512005132280012208000020240000200512005111160021109101016000010001002831152521159202042201160000102005220052200522005220052
1600242005115003652780012128000012800006264000011200320200512005132280012208000020240000200512005111160021109101016000010001002831162521159202072201160000102005220052200522005220052
1600242005115001087680012128000012800006264000011200320200512005132280012208000020240000200512005111160021109101016000010001003231172521159202062201160000102005220052200522005220052

Test 6: throughput

Count: 12

Code:

  smlsl v0.4s, v12.4h, v13.h[1]
  smlsl v1.4s, v12.4h, v13.h[1]
  smlsl v2.4s, v12.4h, v13.h[1]
  smlsl v3.4s, v12.4h, v13.h[1]
  smlsl v4.4s, v12.4h, v13.h[1]
  smlsl v5.4s, v12.4h, v13.h[1]
  smlsl v6.4s, v12.4h, v13.h[1]
  smlsl v7.4s, v12.4h, v13.h[1]
  smlsl v8.4s, v12.4h, v13.h[1]
  smlsl v9.4s, v12.4h, v13.h[1]
  smlsl v10.4s, v12.4h, v13.h[1]
  smlsl v11.4s, v12.4h, v13.h[1]
  movi v12.16b, 13
  movi v13.16b, 14

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2503

retire uop (01)cycle (02)031e373f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1202043174822503561671325120100100120000100120000500960000130020031748300391497331499712010020012000020036000030039317481112020110099100100120000100007610216113003601200001003174930040317493004031749
1202043004022500880251201001001200001001200005004399524130903030039317481665331595512010020012000020036000030039317481112020110099100100120000100007682116113174501200001003004031749300403174930040
12020430039238002316713251201181001200181001200005004399524131729031748300391497331670612010020012000020036000031748300391112020110099100100120000100007610116113174501200001003004031749300403004030040
120204300392250042671325120118100120018100120000500990000131729031748300391497331499712010020012000020036000030039300401112020110099100100120000100007610116113174501200001003174930040300403004031749
120204317482250141671325120118100120018100120000500960000030020030039300401497331596812010020012000020036000030039317481112020110099100100120000100007610116113174501200001003004030040300413004030041
120204317482250061025120100100120000100120000500990000130021031748300391497331596812010020012000020036000030039317481112020110099100100120000100007610116113003701200001003174930040300413004031749
120204317482250041025120100116120000100120000500960000130020030039300401665331596812010020012000020036000030040300391112020110099100100120000100007610116113003601200001003004030041300403004030041
120204317482250041671325120100100120018100120000500960000130020030039300421497331499712010020012000020036000031748300391112020110099100100120000100007610116113003601200001003004030041300403004130040
120204300392380041671325120118100120018100120000500960000130020030039300401497331499712010020012000020036000030039317481112020110099100100120000100007610116113174501200001003174930040317493004030041
120204300402253041671325120101100120018100120000500960000031729030040300391497331499712010020012000020036000030039317481112020110099100100120000100007610116113003601200001003004031749300403174930040

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2503

retire uop (01)cycle (02)03mmu table walk data (08)191e373f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)6061696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)accfd2d5map dispatch bubble (d6)dbddfetch restart (de)e0? int output thing (e9)eb? simd retires (ee)? int retires (ef)f5f6f7f8fd
12002430338229000110302512001010120000101200005043995240030020300393003914996031501912001020120000203600003003930039111200211091010120000100000752002160543003600120000103004030040309233004030041
120024309222250000101902512001110120001101200005043940610030020300403003914996031501912001020120000203600003003930040111200211091010120000100000752006160353003600120000103004030040300403004030040
12002430040238000090302512001010120000101200005042834000030020300393003914996031501912003420120000203600003003930039111200211091010120000100010752004160283003600120000103004030923300403174930923
120024300392250000400251200101012000010120000509600000030020300393003914996031501912001020120000203600003003930039111200211091010120000100000752003160533003600120000103004030923300413175130040
120024309222250000400251200101012000010120000509600000030020300393003914996031501912001020120000203600003003930039111200211091010120000100000752004160493003600120000103092330040300403092330040
120024309222250000400251200101012000010120000509600000030020300393003914996031501912001020120000203600003003930039111200211091010120000100000752003160443003600120000103004030040300403004030041
1200243003922500004002512001010120000101200005096000000317313003930039149960315019120010201200002036000030039300391112002110910101200001000007520071601043003600120000103004130041317513004030040
120024300392250000400251200101012000010120000509600000031731309223004015850031501912001020120000203600003092230039111200211091010120000100000752006160343003900120000103004030040300413004030041
120024300392250000400251200101012000010120000509600000030020300393003914996031501912001020120000203600003003930039111200211091010120000100000752003160453003600120000103004130041300413004031749
1200243175022500017050251200101012000010120000509900000030903309223003914996031501912001020120000203600003003930922111200211091010120000100000752004160453003600120000103004030040300403004030040