Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

RSUBHN2 (4S)

Test 1: uops

Code:

  rsubhn2 v0.8h, v1.4s, v2.4s
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03l2 tlb miss data (0b)1e3f4e51schedule uop (52)schedule simd uop (54)dispatch simd uop (57)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map simd uop (7e)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd alu (9a)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)f5f6f7f8fd
100430372200612548251000100010003983130301830373037241532895100010003000303730371110011000073316222630100030383038303830383038
100430372300612548251000100010003983130301830373037241532895100010003000303730371110011000073216212630100030383038303830383038
100430372200612548251000100010003983130301830373037241532895100010003000303730371110011000073216222630100030383038303830383038
100430372300612548251000100010003983130301830373037241532895100010003000303730371110011000373216222630100030383038303830383038
1004303723063612548251000100010003983130301830373037241532895100010003000303730371110011000073216122630100030383038303830383038
100430372300612548251000100010003983130301830373037241532895100010003000303730371110011000073216212630100030383038303830383038
100430372300612548251000100010003983130301830373037241532895100010003000303730371110011000073216222630100030383038303830383038
100430372200612548251000100010003983130301830373037241532895100010003000303730371110011000073116222630100030383038303830383038
1004303723001262548251000100010003983130301830373037241532895100010003000303730371110011000073216122630100030383038303830383038
100430372300612548251000100010003983130301830373037241532895100010003000303730371110011000073116112630100030383038303830383038

Test 2: Latency 1->1

Code:

  rsubhn2 v0.8h, v1.4s, v2.4s
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)031e3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb miss (a1)l1d cache writeback (a8)acbranch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
1020430037225106129548251010010010000100100005004277313130018300373003728265328745101002001000020030000300373003711102011009910010010000100000071012162229634100001003003830038300383003830038
10204300372251506129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000071212162229634100001003003830038300383003830038
1020430037225006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000071212163229634100001003003830038300383003830038
1020430037224006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000071212162229634100001003003830038300383003830038
1020430037225006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000071012162229634100001003003830038300383003830038
1020430037225006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000071212163229634100001003003830038300383003830038
1020430037225006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000071012162229634100001003003830038300383003830038
1020430037225006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000071212162229634100001003003830038300383003830038
1020430037225006129548251010010010000100100005004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000071212162229634100001003003830038300383003830038
10204300372250012629548251010010010000100101495004277313030018300373003728265328745101002001000020030000300373003711102011009910010010000100000071212162229634100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)03mmu table walk data (08)191e1f3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
10024300372240000612954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731303001803003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
10024300372250000612954841100101010000101000050427731303001803003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
10024300372240000612954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731313001833003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038
10024300372250000612954825100101010000101000050427731313001803003730037282873287671001020100002030000300373003711100211091010100001000640216222963010000103003830038300383003830038

Test 3: Latency 1->2

Code:

  rsubhn2 v0.8h, v0.4s, v1.4s
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e18191e1f3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
10204300372250000012061295392510100100100001001000050042773131300183003730037282653287451010020010166200300003003730037111020110099100100100001000000000071021622296340100001003003830038300383003830038
1020430037225000003061295482510100100100001001000050042773131300183003730037282653287451010020410000204300003003730037111020110099100100100001000000000071021622296340100001003003830038300383003830038
1020430037225000009061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000071031622296340100001003003830038300383003830038
1020430037225000000061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000071021622296344100001003003830038300383003830038
1020430037225000000061295482510100106100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000020071021622296340100001003003830038300383003830038
1020430037225000000061295482510100100100081001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000071021622296340100001003003830038300383003830038
1020430037225000009061295482510100100100001001000050042773131300183003730037282653287631010020010000200300003003730037111020110099100100100001000000000071021622296340100001003003830038300383003830038
1020430037225000000061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000071021622296340100001003003830038300383003830038
1020430037225000000061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000071021622296340100001003003830038300383003830038
1020430037225000000061295482510100100100001001000050042773131300183003730037282653287451010020010000200300003003730037111020110099100100100001000000000071021622296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)03181e1f3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
100243003723302759861295482510010101000010100005042773130300183003730037282870328767100102010000203000030037300371110021109101010000100000000640516762963010000103003830038300383003830038
100243003722500061295482510010101000010100005042773131300183003730037282870328767100102010000203000030037300371110021109101010000100000000640616762963010000103003830038300383003830038
100243003722500061295482510010101000810100005042773130300183003730037282870328767101602010000203000030037300371110021109101010000100000000640716762963010000103003830038300383003830038
100243003722400061295482510010101000010100005042773131300183003730037282870328767100102010000203000030037300371110021109101010000100000000640716772963010000103003830038300383003830038
100243003722500061295482510010101000010100005042773131300183003730037282870328767100102010000203000030037300371110021109101010000100000000640616662963010000103003830038300383003830038
100243003722500061295482510010101000010100005042773130300183003730037282870328767100102010000203000030037300371110021109101010000100000000640616762963010000103003830038300383003830038
10024300372250120103295482510010101000010100005042773131300183003730037282870328767100102010000203000030037300371110021109101010000100000000640616762963010000103003830038300383003830038
100243003722509061295482510010101000010100005042773130300183003730037282870328767100102010000203000030037300371110021109101010000100000000640716672963010000103003830038300383003830038
100243003722500061295482510010101000010100005042773131300183003730037282870328767100102010000203000030037300371110021109101010000100000000640616772963010000103003830038300383003830038
100243003722500061295482510010101000010100005042773130300183003730037282870328767100102010000203000030037300371110021109101010000100000000640716772970210000103003830038300383003830038

Test 4: Latency 1->3

Code:

  rsubhn2 v0.8h, v1.4s, v0.4s
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)0318191e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)acbranch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
102043003722500061295482510100100100001001000050042773131300183003730037282650328745101002001000020030000300373003711102011009910010010000100000071011611296340100001003003830038300383003830038
102043003722500061295482510100100100001001000050042773131300183003730037282650328745101002001000020030000300373003711102011009910010010000100000074111611296340100001003003830038300383003830038
1020430037225000726295482510100100100001001000050042773131300183003730037282650328745101002001000020030000300373003711102011009910010010000100000071011611296340100001003003830038300383003830038
1020430037224000726295482510100100100001001000050042773130300183003730037282650328745101002001000020030000300373003711102011009910010010000100000071011611296340100001003003830038300383003830038
102043003722400061295482510100100100001001000050042773131300183003730037282650328745101002001000020030000300373003711102011009910010010000100000071011611296340100001003003830038300383003830038
1020430037225000251295482510100100100001001000050042773130300183003730037282650328745101002001000020030000300373003711102011009910010010000100000071011611296340100001003003830038300383003830038
1020430037224000726295482510100100100001001000050042773131300183003730037282650328745101002001000020030000300373003711102011009910010010000100000071011611296340100001003003830038300383003830038
102043003722500061295482510100100100001001000050042773131300183003730037282650328745101002001000020030000300373003711102011009910010010000100000071011611296340100001003003830038300383003830038
10204300372320018726295482510100100100001001000050042773131300183003730037282650328745101002001000020030000300373003711102011009910010010000100000071011611296340100001003003830038300383003830038
1020430037225000251295482510100100100001001000050042773130300183003730037282650328745101002001000020030000300373003711102011009910010010000100000071011611296340100001003003830038300383003830038

1000 unrolls and 10 iterations

Result (median cycles for code): 3.0037

retire uop (01)cycle (02)031e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb miss (a1)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
100243003722506129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100250640416342963010000103003830038300383003830038
100243003722506129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100243640316432963010000103003830038300383003830038
100243003722506129548251001010100001010000504277313130018300373003728287328767100102010000203000030037300371110021109101010000100313640316342963010000103003830038300383003830038
100243003722506129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100353640316342963010000103003830038300383003830038
100243022722506129548251001010100001010000504277313030018300373003728287728767101592010000203000030037300371110021109101010000100283640316342963010000103003830038300383003830038
100243003722506129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100084640416342963010000103003830038300383003830038
100243003722506129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300841110021109101010000100273640416342963010000103003830038300383003830038
100243003722506129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100303640316342963010000103003830038300383003830038
100243003722506129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100326640416342963010000103003830038300383003830038
100243008522506129548251001010100001010000504277313030018300373003728287328767100102010000203000030037300371110021109101010000100303640416442963010000103003830038300383003830038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  rsubhn2 v0.8h, v8.4s, v9.4s
  movi v1.16b, 0
  rsubhn2 v1.8h, v8.4s, v9.4s
  movi v2.16b, 0
  rsubhn2 v2.8h, v8.4s, v9.4s
  movi v3.16b, 0
  rsubhn2 v3.8h, v8.4s, v9.4s
  movi v4.16b, 0
  rsubhn2 v4.8h, v8.4s, v9.4s
  movi v5.16b, 0
  rsubhn2 v5.8h, v8.4s, v9.4s
  movi v6.16b, 0
  rsubhn2 v6.8h, v8.4s, v9.4s
  movi v7.16b, 0
  rsubhn2 v7.8h, v8.4s, v9.4s
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)acbranch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
160204200651500392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001001001011211611200611600001002006520065200652006520065
160204200641510392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001001301011221621200611600001002006520065200652006520065
160204200641510392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000601011211611200611600001002006520065200652006520065
1602042006415102292580100100800001008000050064000012004520064200643228010020080000200240000200642006411160201100991001001600001004411101011111621200611600001002006520065200652006520065
160204200641500392580100100800001008000050064000012004520064200643228010020080000200240000200642006411160201100991001001600001004310201011111611200611600001002006520065200652006520065
16020420393150039258010010080000100800005006400000200452006420064322801002008000020024000020064200641116020110099100100160000100013201011211622200611600001002006520065200652006520065
160204200641500392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000001011221612200611600001002006520065200652006520065
160204200641500392580100100800001008000050064000002004520064200643228010020080000200240000200642006411160201100991001001600001000001011221622200611600001002006520065200652006520065
1602042006415005142580100100800001008000050064000012004520064200643228010020080000200240000200642006411160201100991001001600001000001011221612200611600001002006520065200652006520065
1602042006415003925801001008000010080000500640000020045200642006432280100200800002002400002006420064111602011009910010016000010036601011221622200611600001002006520065200652006520065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2507

retire uop (01)cycle (02)03mmu table walk instruction (07)1e3a3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb miss (a1)l1d cache writeback (a8)a9accfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaec? simd retires (ee)? int retires (ef)f5f6f7f8fd
16002420075150100512780012128000012800006264000011020041200512006032280012208000020240000200512005111160021109101016000010014001002782142521145200482201160000102005220052200522005220052
160024200511500114045278001212800001280000626400001152003220051200513228001220800002024000020051200511116002110910101600001001001002684132521134200482201160000102005220052200522005220052
160024200511500002352980012128000012800006264000011520032200512005132280012208000020240000200512005111160021109101016000010019091002784162521146200482201160000102005220052200522005220052
16002420051150090452780012128000012800006264000011520032200512005132280012208000020240000200512005111160021109101016000010057031002984142521146200482201160000102005220052200522005220052
16002420051150000452780012128000012800006264000011520032200512005132280012208000020240000200512005111160021109101016000010034001002784162521155200482201160000102005220052200522005220052
16002420051150000452780012128000012800006264000011520032200512005132280012208000020240000200512005111160021109101016000010033031002984152521166200482201160000102005220052200522005220052
16002420051151000452780012128000012800006264000011520032200512006032280012208000020240000200512005111160021109101016000010054001002685152522135200482201160000102005220052200522005220052
160024200511500006152980012128000012800006264000001520032200602006032280012208000020240000200602005111160021109101016000010000010032115233442134200572202160000102006120052200612006120052
1600242006015100051298001212800001280000626400000152004120060200603228001220800002024000020060200601116002110910101600001001708710031115253442245200572402160000102006120061200612006120061
16002420060150000512980012128000012800006264000001520041200602006032280012208000020240000200602006011160021109101016000010020310030115243442244200572402160000102006120061200612005220061

Test 6: throughput

Count: 16

Code:

  rsubhn2 v0.8h, v16.4s, v17.4s
  rsubhn2 v1.8h, v16.4s, v17.4s
  rsubhn2 v2.8h, v16.4s, v17.4s
  rsubhn2 v3.8h, v16.4s, v17.4s
  rsubhn2 v4.8h, v16.4s, v17.4s
  rsubhn2 v5.8h, v16.4s, v17.4s
  rsubhn2 v6.8h, v16.4s, v17.4s
  rsubhn2 v7.8h, v16.4s, v17.4s
  rsubhn2 v8.8h, v16.4s, v17.4s
  rsubhn2 v9.8h, v16.4s, v17.4s
  rsubhn2 v10.8h, v16.4s, v17.4s
  rsubhn2 v11.8h, v16.4s, v17.4s
  rsubhn2 v12.8h, v16.4s, v17.4s
  rsubhn2 v13.8h, v16.4s, v17.4s
  rsubhn2 v14.8h, v16.4s, v17.4s
  rsubhn2 v15.8h, v16.4s, v17.4s
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2502

retire uop (01)cycle (02)031e373f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)acbranch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
1602044005230000512516011710016000010016000050012800001400294004840048199733199971601002001600002004800004003940039111602011009910010016000010000001011011611400361600001004004040041400404004040050
1602044004030000412516010010016000010016000050012800000400204003940039199733199971601002001600002004800004003940039111602011009910010016000010000001011011611400451600001004004040053400414004940040
16020440048300017412516011710016000010016000050012800000400204004940040199733199971601002001600002004800004003940039111602011009910010016000010000001011011611400361600001004004040049400494004040049
1602044005229900412516010010016000110016000050023990820400334003940039199733199971601002001600002004800004003940039111602011009910010016000010030001011011611400461600001004004040041400404005040053
1602044004930000542516010010016001710016000050012800001400204003940049199733199971601002001600002004800004003940048111602011009910010016000010000001011011611400451600001004004040040400494005340049
1602044003930000412516010010016001710016000050013199991400334003940039199733200061601002001601442004800004004940040111602011009910010016000010000001011011611400491600001004004140040400404005040040
16020440039300017512516011710016001710016000050012800001400204005240039199733200101601002001600002004800004004840039111602011009910010016000010000001011011611400451600001004004040049400534033340049
1602044004830000412516010010016000010016000050012800001400294003940039199733199971601002001600002004800004003940048111602011009910010016000010000001011011611400361600001004004040049400404004040040
1602044005230000542516010010016000010016000050012800001400204003940039199733199971601002001600002004800004003940039111602011009910010016000010000001011011611400451600001004004040040400404005040040
1602044004830000502516011710016001710016000050025191291400204003940039199733200061601002001600002004800004003940039111602011009910010016000010000001011011611400491600001004004040052400494004040049

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2502

retire uop (01)cycle (02)03l2 tlb miss instruction (0a)1e373f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)5f6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaeb? simd retires (ee)? int retires (ef)f5f6f7f8fd
160024400403000004625160010101600001016000050128000011540029400484003919996320028160010201600002048042040039400391116002110910101600001013100228211716111182040036209160000104004040050400494004040040
1600244004829900174625160027101600171016000050239899911540029400394004819996320019160010201600002048000040039400391116002110910101600001000100228211916211212240036209160000104010040040400494004040049
160024400483000004625160027101600171016000050128000011540020400394004819996320028160010201600002048000040048400391116002110910101600001013100228211816211181840036219160000104004040040400494004040050
16002440039300012174625160010101600001016000050128000011540020400394004819996320019160010201600002048000040039400481116002110910101600001000100228212016211202240036217160000104004040049400404004040049
16002440049299000566625160010101600001016013950128000011540029401284003919996320067160010201600002048155440048400393116002110910101600001000100228211916211152040045207160000104028440040400494004040040
1600244004829900174625160010101600001016000050239899911540020400394004819996320019160010201600002048000040048400391116002110910101600001000100228211816211221940045216160000104004040040400404004040049
160024400393000004625160027101600001016000050128000011540020400394004819996320028160010201600002048000040048400391116002110910101600001000100228211716211171740036217160000104004940040400994009240049
1600244010030000175525160010101600001016000050128000011540020400394004819996320028160010201600002048000040039400481116002110910101600001000100228211816211211740036209160000104004940040400494004940049
16002440048300001746251600101016001710160000501280000115400294004840039199963200191600102016000020480000400484003911160021109101016000010001002282119161112118400451026160000104004040049400404004940040
1600244004030000175525160010101600001016000050239902711540020400394004819996320019160010201600002048000040039400391116002110910101600001000100228211716211172540045206160000104004940040400494004940040