Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

UMLSL (vector, 4S)

Test 1: uops

Code:

  umlsl v0.4s, v1.4h, v2.4h
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)031e3f4e51schedule uop (52)schedule simd uop (54)dispatch simd uop (57)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map simd uop (7e)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd alu (9a)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)f5f6f7f8fd
100430372306125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372206125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372206125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
1004303722061254825100010001000398313130183037303724153289510001000300030373037111001100006373116112630100030383038303830383038
100430372206125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372206125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372306125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
1004303723061254825100010081000398313130183037303724153289510001000300030373037111001100006373116112630100030383038303830383038
100430372206125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038
100430372206125482510001000100039831313018303730372415328951000100030003037303711100110000073116112630100030383038303830383038

Test 2: Latency 1->1

Code:

  umlsl v0.4s, v1.4h, v2.4h
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acc2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1020430037224000000066295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000710031633296680100001003003830038300383003830038
10204300372250000120064295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000710031633296340100001003003830038300383003830038
1020430037225000000061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000710031633296340100001003003830038300383003830038
10204300372250000000271295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000710031633296340100001003003830038300383003830038
1020430037224000000061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000710031633296340100001003003830038300383003830038
1020430037225000000061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000710031633296340100001003003830038300383003830038
1020430037225000000061295482510100100100001001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000710031633296340100001003003830038300383003830038
10204300372251000000612954825101001001000010010000500427731303001830037300372826532879210100200100002003000030037300371110201100991001001000010000000150710031633296340100001003003830038300383003830038
102043003722500000001374295482510100100100001001000050042773130300183003730037282653287451010020010164200300003003730037111020110099100100100001000000000710031633296340100001003003830038300383003830038
1020430037225000000061295482510100100100321001000050042773130300183003730037282653287451010020010000200300003003730037111020110099100100100001000000700710031633296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)191e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fld unit uop (a6)l1d cache writeback (a8)a9acbranch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
10024300372250000822954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000000640516222963010000103003830038300383003830038
1002430037225000010922954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
100243003722500009822954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003721100211091010100001000000640216222963010000103003830038300383003830038
1002430037225000011872954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
100243003722500003252954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000000640316222963010000103003830038300383003830038
1002430037224000010502954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
1002430037225000010192954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
1002430037225000011012954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038
1002430037225000010832954825100101010000101000050427731313006503003730037282873287671001020100002030000300373003711100211091010100001000000640216222963010000103003830038300383003830038

Test 3: Latency 1->2

Code:

  umlsl v0.4s, v0.4h, v1.4h
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)031e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d cache writeback (a8)accdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
1020430037225061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000007101161129634100001003003830038300383003830038
1020430037225061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000007101161129634100001003003830038300383003830038
10204300372250912295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000007101161129634100001003003830038300383003830038
1020430037225061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000007101161129634100001003003830038300383003830038
1020430037224061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000007101161129634100001003003830038300383003830038
1020430037225061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000007101161129634100001003003830038300383003830038
1020430037225061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000007101161129634100001003003830038300383003830038
1020430037225061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000007101161129634100001003003830038300383003830038
1020430037225061295482510100100100001001000050042773133001830037300372826532874510100200100002003000030037300371110201100991001001000010000007101161129634100001003003830038300383003830038
1020430037225061295482510100100100001001000050042773133001830037300852826532874510100200100002003000030037300371110201100991001001000010001007101161129634100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)1e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)acbranch cond mispred nonspec (c5)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
10024300372240009492954825100101010000101000050427731316300183003730037282873287671001020101672030000300373003711100211091010100001000064064162229630010000103003830038300383003830038
1002430037226000612954825100101010000101000050427731315300183003730037282873287671001020100002030000300373003711100211091010100001000064052162229630010000103003830038300383003830038
10024300372250002512954825100101010000101000050427731315300183003730037282873287671001020100002030000300373003711100211091010100001000064022162229630010000103003830038300383003830038
1002430037224000612954825100101010000101000050427731312300183003730037282873287671001020100002030000300373003711100211091010100001000064022162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731312300183003730037282873287671001020100002030000300373003711100211091010100001000064022162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731312300183003730037282873287671001020100002030000300373003711100211091010100001003064022162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731312300183003730037282873287671001020100002030000300373003711100211091010100001000064022162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731312300183003730037282873287671001020100002030000300373003711100211091010100001000064022162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731312300183003730037282873287671001020100002030000300373003711100211091010100001003064022162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731312300183003730037282873287671001020100002030000300373003711100211091010100001000064022162229630010000103003830038300383003830038

Test 4: Latency 1->3

Code:

  umlsl v0.4s, v1.4h, v0.4h
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)0318191e1f3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)ld unit uop (a6)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
102043003722500006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500008929548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161129634100001003003830038300383003830038
102043003722500006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000007101161029634100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)03l1i tlb fill (04)0f1e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fld unit uop (a6)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1002430037225110612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037224000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300841110021109101010000100006402162229630010000103003830038300383003830038
1002430037224000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038
1002430037225000612954825100101010000101000050427731330018300373003728287328767100102010000203000030037300371110021109101010000100006402162229630010000103003830038300383003830038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  umlsl v0.4s, v8.4h, v9.4h
  movi v1.16b, 0
  umlsl v1.4s, v8.4h, v9.4h
  movi v2.16b, 0
  umlsl v2.4s, v8.4h, v9.4h
  movi v3.16b, 0
  umlsl v3.4s, v8.4h, v9.4h
  movi v4.16b, 0
  umlsl v4.4s, v8.4h, v9.4h
  movi v5.16b, 0
  umlsl v5.4s, v8.4h, v9.4h
  movi v6.16b, 0
  umlsl v6.4s, v8.4h, v9.4h
  movi v7.16b, 0
  umlsl v7.4s, v8.4h, v9.4h
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire uop (01)cycle (02)03191e1f3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
160204200891500003925801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010000001011111611200611600001002006520065200652006520065
160204200641500008125801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010044001011111611200611600001002023420065200652006520065
1602042006415011711763925801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010000001011111641200611600001002006520065200652006520065
160204200641510003925801001008000010080000500640000020045200642006432280100200800002002400002006420064111602011009910010016000010000001011111611200611600001002006520065200652006520065
160204200641500003925801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010000001011111611200611600001002006520065200652006520065
160204200641500008125801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010000001011111611200611600001002006520065200652006520065
160204200641500003925801001008000010080000500640000120045200642006432280100200800002002400002006420064111602011009910010016000010000001011111611200611600001002006520065200652006520065
16020420064150041703925801001008000010080000500640000120045200642035332280100200800002002400002006420064111602011009910010016000010000031011111611200611600001002006520065200652006520065
160204200641500003925801001008000010080000500640000020045200642006432280100200800002002400002006420064111602011009910010016000010000001011111611200611600001002006520065200652006520065
16020420064150012018825801001008000010080000500640000020045200642006432280100200800002002400002006420064111602011009910010016000010000001011111611200611600001002006520065200652006520065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2507

retire uop (01)cycle (02)03l1i tlb fill (04)1e3a3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)5f60696d6erob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)branch mispred nonspec (cb)cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0ea? simd retires (ee)? int retires (ef)f5f6f7f8fd
160024200581501036625800101080000108000050640000112003420053200530322800102080000202400002005320053111600211091010160000100010041311312021124262005015160000102005420054200542005420054
160024200531501036625800101080000108000050640000112003420053200530322800102080000202400002005320053111600211091010160000100010052311212021126222005015160000102005420054200542005420054
160024200531501037225800101080000108000050640000012003420053200530322800102080000202400002005320053111600211091010160000100010051312232041122252005015160000102005820054200582005420054
1600242005315010315225800101080000108000050640000112003420053200530322800102080000202400002005720057111600211091010160000100010049311252021126252005015160000102005420054200542005420054
160024200531501036625800101080000108000050640000112003420053200530322800102080000202400002005320053111600211091010160000100010043311242041123242005015160000102005420054200542005420054
1600242005315010325425800101080000108000050640000112003420053200530322800102080000202400002005320053111600211091010160000100010048321242021125212005015160000102005420054200542005420054
160024200531501036625800101080000108000050640000112003420053200530322800102080000202400002005320053111600211091010160000100010053311252021120242005015160000102005420054200542005420054
160024200531501036625800101080000108000050640000012003420053200530322800102080000202400002005320053111600211091010160000100010053311262021125242005015160000102005420054200542005420054
160024200531501036625800101080000108000050640000112003420053200535322800102080000202400002005320057111600211091010160000100010051311262041125242005015160000102005420054200542005420054
1600242005315010381125800101080000108000050640000112003420053200530322800102080000202400002005320053111600211091010160000100010055311272021122232005015160000102005420054200542005420054

Test 6: throughput

Count: 16

Code:

  umlsl v0.4s, v16.4h, v17.4h
  umlsl v1.4s, v16.4h, v17.4h
  umlsl v2.4s, v16.4h, v17.4h
  umlsl v3.4s, v16.4h, v17.4h
  umlsl v4.4s, v16.4h, v17.4h
  umlsl v5.4s, v16.4h, v17.4h
  umlsl v6.4s, v16.4h, v17.4h
  umlsl v7.4s, v16.4h, v17.4h
  umlsl v8.4s, v16.4h, v17.4h
  umlsl v9.4s, v16.4h, v17.4h
  umlsl v10.4s, v16.4h, v17.4h
  umlsl v11.4s, v16.4h, v17.4h
  umlsl v12.4s, v16.4h, v17.4h
  umlsl v13.4s, v16.4h, v17.4h
  umlsl v14.4s, v16.4h, v17.4h
  umlsl v15.4s, v16.4h, v17.4h
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2502

retire uop (01)cycle (02)0318191e373f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
1602044006030000304125160100100160000100160000500239899904002140048400481997331999716010020016000020048000040048400392116020110099100100160000100001011021611400361600001004004940040400494004040040
1602044003930000005025160100100160017100160000500128000014003340039400491997332000616010020016000020048000040039400481116020110099100100160000100001011011611400361600001004004940049400404004940049
1602044003930000004125160100100160187122160000500239899904002940048400481997331999716010020016000020048000040048400391116020110099100100160000100001011011611400361600001004004940040400494004040049
16020440048300000174125160117100160017100160000500128000004002440048400391997331999716010020016000020048000040048400391116020110099100100160000100001011011611400361600001004004040049400404005040040
1602044003930000005025160117100160017100160000500128000004002940048400391997332000616010020016000020048000040039400481116020110099100100160000100001011011611400451600001004005040040400494004040049
16020440048300000175025160117100160017100160000500128000004002040039400481997331999716010020016000020048000040048400391116020110099100100160000100001011011711400451600001004004040049400404004940040
160204400393000001770625160100100160000100160000500128000004002140039400391997331999716010020016000020048000040039400481116020110099100100160000100001011011611400451600001004004040049400404004940040
16020440048300000174125160117100160017100160000500128000004002940048400391997332000616010020016000020048000040039400481116020110099100100160000100001011011611400451600001004004040049400404004040049
1602044003930000004125160117100160000100160000500128000004002940048400481997332002416010020016000020048000040039400391116020110099100100160000100001011011611400451600001004004940040400504004940040
1602044003930000004125160100100160000100160000500239899904002940048400391997331999716010020016000020048000040039400481116020110099100100160000100001011011611400451600001004004040040400404004940049

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2502

retire uop (01)cycle (02)0318191e1f373a3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)5f6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaeb? simd retires (ee)? int retires (ef)f5f6f7f8fd
16002440049299000000462516001010160000101600005012800001110400204003940039199963200281600102016000020480000400394003911160021109101016000010023410024168229163222628400364113160000104004040040400504004040040
160024400483000000005825160010101600001016000050128000001104002040039400391999632001916001020160000204800004003940039111600211091010160000100010026168225163222627400364113160000104004040040400404004040040
160024400393000000015825160010101600181016000050128000001104003040039400391999632001916001020160000204800004003940039111600211091010160000100010026168225163221626400364113160000104004040040400404004040040
160024400483000000015825160010101600001016000050243886501104002040039400391999632001916001020160000204800004004940049111600211091010160000100010026168225163222127400364113160000104004040040400404004040040
160024400393000000015825160010101600001016000050128000001104002040039400391999632002016001020160000204800004003940039111600211091010160000100010024169228163222725400364113160000104004040040400404004040040
160024400393000000015825160028101600001016000050243886501104002040039400391999632001916001020160000204800004003940039111600211091010160000100010026168229163221728400364113160000104005040040400404004040040
160024400392990000005825160010101600001016000050128000001104002040039400391999632001916001020160000204800004003940039111600211091010160000100010026168226163222727400364113160000104004040040400404004040040
1600244004930000001815225160010101600001016000050128000001104002040048400481999632002916001020160000204800004003940039111600211091010160000100010026168229163222420400364113160000104004040050400404004040040
160024400393000000015825160010101600001016000050128000001104003040039400391999632001916001020160000204800004003940039111600211091010160000100010026168227163221427400364113160000104004040040400404004040040
160024400483000000005225160010101600001016000050128000001104002040039400391999632001916001020160000204800004003940039111600211091010160000100010024169226163222914400454113160000104004040040400404004040040