Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

BIF (vector, 16B)

Test 1: uops

Code:

  bif v0.16b, v1.16b, v2.16b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)031e3f4e51schedule uop (52)schedule simd uop (54)dispatch simd uop (57)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map simd uop (7e)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd alu (9a)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)f5f6f7f8fd
100420371506116872510001000100026468012018203720371572318951000100030002037203711100110000073116111787100020382038203820382038
100420371506116872510001000100026468002018203720371572318951000100030002037203711100110000073116111787100020382038203820382038
100420371506116872510001000100026468012018203720371572318951000100030002037203711100110000073116111787100020382038203820382038
100420371506116872510001000100026468012018203720371572318951000100030002037203711100110000073116111787100020382038203820382038
100420371606116872510001000100026468002018203720371572318951000100030002037203711100110000073116111787100020382038203820382038
100420371506116872510001000100026468012018203720371572318951000100030002037203711100110000073116111787100020382038203820382038
100420371506116872510001000100026468002018203720371572318951000100030002037203711100110000073116111787100020382038203820382038
100420371506116872510001000100026468002018203720371572318951000100030002037203711100110000073116111787100020382038203820382038
100420371506116872510001000100026468012018203720371572318951000100030002037203711100110000073116111787100020382038203820382038
100420371506116872510001000100026468002018203720371572318951000100030002037203711100110000073116111787100020382038203820382038

Test 2: Latency 1->1

Code:

  bif v0.16b, v1.16b, v2.16b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.0037

retire uop (01)cycle (02)031e3a3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1020420037150240611968725101001001000010010000500284768002001802003720037184223187451010020010000200300002003720037111020110099100100100001000710121622197910100001002003820038200382003820038
102042003715000611968725101001001000010010000500284768012001802003720037184223187451010020010000200300002003720037111020110099100100100001000710121622197910100001002003820038200382003820038
102042003715001611968725101001001000010010000500284768012001802003720037184223187451010020010000200300002003720037111020110099100100100001000710121622197910100001002003820038200382003820038
102042003715000611968725101001001000010010000500284768002001802003720037184223187451010020010000200300002003720037111020110099100100100001000710121622197910100001002003820038200382003820038
102042003715000611968725101001001000010010000500284768012001802003720037184223187451010020010000200300002003720037111020110099100100100001000710121622197910100001002003820038200382003820038
102042003715000611968725101001001000010010000500284768002001802003720037184223187451010020010000200300002003720037111020110099100100100001000710121622197910100001002003820038200382003820038
102042003715000611968725101001001000010010000500284768002001802003720037184223187451010020010000200300002003720037111020110099100100100001000710121622197910100001002003820038200382003820038
102042003715090611968725101001001000010010000500284768012001802003720037184223187451010020010000200300002003720037111020110099100100100001000710121622197910100001002003820038200382003820038
102042003715000611968725101001001000010010000500284768012001802003720037184223187451010020010000200300002003720037111020110099100100100001000710121622197910100001002003820038200382003820038
102042003715000611968725101001001000010010000500284768002001802003720037184223187451010020010000200300002003720037111020110099100100100001000710121622197910100001002003820038200382003820038

1000 unrolls and 10 iterations

Result (median cycles for code): 2.0037

retire uop (01)cycle (02)031e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)acbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
100242003715006119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000101300640424341978510000102003820038200382003820038
1002420037150126119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000100000640316341978510000102003820038200382003820038
100242003714906119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000100000640416431978510000102003820038200382003820038
100242003715006119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000101000640316541978510000102003820038200382003820038
100242003715006119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000100000640316341978510000102003820038200382003820038
100242003715006119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000102000640416331978510000102003820038200382003820038
100242003715006119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000100000640316341978510000102003820038200382003820038
100242003715006119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000100000640316431978510000102003820038200382003820038
100242003715006119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000100000640316431978510000102003820038200382003820038
100242003715006119687251001010100001010000502847680020018200372003718444318767100102010000203000020037200371110021109101010000100000640416431978510000102003820038200382003820038

Test 3: Latency 1->2

Code:

  bif v0.16b, v0.16b, v1.16b
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.0037

retire uop (01)cycle (02)031e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
102042003715006119687251010010010000100100005002847680120018200372003718422318745101002001000020030000200372003711102011009910010010000100007103161119791100001002003820038200382003820038
102042003715006119687251010010010000100100005002847680120018200372003718422318745101002001000020030000200372003711102011009910010010000100007101161119791100001002003820038200382003820038
102042003715006119687251010010010000100100005002847680120018200372003718422318745101002001000020030000200372003711102011009910010010000100007101161119791100001002003820038200382003820038
102042003715006119687251010010010000100100005002847680120018200372003718422318745101002001000020030000200372003711102011009910010010000100007101161119791100001002003820038200382003820038
1020420037150061196872510100100100001001000050028476801200182003720037184223187451010020010000200300002003720037111020110099100100100001000787101161119791100001002003820038200382003820038
102042003715006119687251010010010000100100005002847680120018200372003718422318745101002001000020030000200372003711102011009910010010000100007101161119791100001002003820038200382003820038
102042003715006119687251010010010000100100005002847680120018200372003718422318745101002001000020030000200372003711102011009910010010000100207101161119791100001002003820038200382003820038
102042003715006119687251010010010000100100005002847680120018200372003718422318745101002001000020030000200372003711102011009910010010000100807101161119791100001002003820038200382003820038
102042003715096119687251010010010000100100005002847680120018200372003718422318745101002001000020030000200372003711102011009910010010000100007101161119791100001002003820038200382003820038
102042003715006119687251010010010000100100005002847680120018200372003718422318745101002001000020030000200372003711102011009910010010000100007101161119791100001002003820038200382003820038

1000 unrolls and 10 iterations

Result (median cycles for code): 2.0037

retire uop (01)cycle (02)03mmu table walk instruction (07)1e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1002420037150006119687251001010100001010000502847680200182003720037184440318767100102010000203000020037200371110021109101010000100000816402162219785010000102003820038200382003820038
1002420037150004411968725100101010000101000060284768020018200372003718444031876710010201000020300002003720037111002110910101000010001006402162219785010000102012220038200382003820038
100242003715000611968725100101010000101000050284768020018200372003718444031876710010201000020300002003720037111002110910101000010001006402162219785010000102003820038200382003820038
100242003715000611968725100101010000101000050284768020018200372003718444031876710010201000020300002003720037111002110910101000010000006403162219785010000102003820038200382003820038
100242003715010611968725100101010000101000050284768020018200372003718444031876710010201000020300002003720037111002110910101000010000006402162219785010000102003820038200382003820038
100242003715000611968725100101010000101000050284768020018200372003718444031876710010201000020300002003720037111002110910101000010000006402162219785010000102003820038200382003820038
100242003715000611968725100101010000101000060284768020018200372003718444031876710010201000020300002003720037111002110910101000010000006402162219785010000102003820038200382003820038
100242003715000611968725100101010000101000050284768020018200372003718444731876710010201000020300002003720084111002110910101000010000006402162219785010000102003820038200382003820038
100242003715000611968725100101010000101000050284768020018200372003718444031876710010201000020300002003720037111002110910101000010000006402162019785010000102003820038200382003820038
10024200371500061196872510012121000012100006028476802001820037200371844403187671001020100002030000200372003711100211091010100001000001326402162219785010000102003820038200382003820038

Test 4: Latency 1->3

Code:

  bif v0.16b, v1.16b, v0.16b
  movi v0.16b, 1
  movi v1.16b, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.0037

retire uop (01)cycle (02)031e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fld unit uop (a6)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
1020420037159061196872510100100100001001000050028476800200182003720037184223187451010020010000200300002003720037111020110099100100100001000007101161119791100001002003820038200382003820038
1020420037150061196872510100100100001001000050028476800200182003720037184223187451010020010000200300002003720037111020110099100100100001000007101161119791100001002003820038200382003820038
1020420037150061196872510100100100001001000050028476800200182003720037184223187451010020010000200300002003720037111020110099100100100001000007101161119791100001002003820038200382003820038
102042003715012573196872510100100100001001000050028476800200182003720037184223187451010020010000204304982017920037111020110099100100100001000007101161119791100001002003820038200382003820038
1020420037155061196878210100100100001001000056528476800200182003720037184223187451010020010000200304922008620037111020110099100100100001002007341161119859100001002003820038200382003820038
102042003715012103196872510100100100001001000050028476801200182003720037184223187451010020010000200300002003720037111020110099100100100001000137101161119791100001002003820038200382003820038
1020420037150061196762510100100100001001000050028476800200182003720037184223187451010020010000200300002003720037111020110099100100100001000007101161119791100001002003820038200382003820038
1020420037150061196872510100100100001001000050028476800200182003720037184223187451010020010000200300002003720037111020110099100100100001000407101161119791100001002008520038200382008520038
1020420037150061196872510100100100001001000050028476800200182003720037184223187451010020010000200300002003720037111020110099100100100001000007101161119791100001002003820038200382003820038
10204200371500611968725101001001000010010000500284768002001820037200371842231874510100200100002003000020037200371110201100991001001000010000997101161119791100001002003820038200382003820038

1000 unrolls and 10 iterations

Result (median cycles for code): 2.0037

retire uop (01)cycle (02)031e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9faccfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
10024200371500105196872510010101000010100005028476801200182003720037184443187671001020100002030000200372003711100211091010100001006403163319785010000102003820038200382003820038
10024200371509982196872510010101000010100005028476801200182003720037184443187671001020100002030000200372003711100211091010100001006403163319785010000102003820038200382003820038
10024200371509103196872510010101000010100005028476801200182003720037184443187671001020101722030000200372003711100211091010100001006403163319785010000102003820038200382003820038
100252003715011761196872510010101000010100005028476801200182003720037184443187671001020100002030000200372003711100211091010100001006403163319785010000102003820038200382003820038
1002420037150084196762510010101000010100005028476800200182003720037184443187671001020100002030000200372003711100211091010100001006403163319785010000102003820038200382003820038
1002420037150084196872510010101000010100005028476801200182003720037184443187671001020100002030000200372003711100211091010100001006403163319785010000102003820038200382003820038
1002420037150061196872510010101000010100005028476801200182003720037184443187671001020100002030000200372003711100211091010100001006403163319785010000102003820038200382003820038
1002420037150061196872510010101000010100005028476800200182003720037184443187671001020100002030000200372003711100211091010100001006403164319785010000102003820038200382003820038
10024200371500103196872510010101000010100005028476801200182003720037184443187671001020100002030000200372003711100211091010100001006403164319785310000102003820038200382003820038
1002420037150084196872510010101000010100005028476801200182003720037184443187671001020100002030000200372003711100211091010100001006403164319785010000102003820038200382003820038

Test 5: throughput

Count: 8

Code:

  movi v0.16b, 0
  bif v0.16b, v8.16b, v9.16b
  movi v1.16b, 0
  bif v1.16b, v8.16b, v9.16b
  movi v2.16b, 0
  bif v2.16b, v8.16b, v9.16b
  movi v3.16b, 0
  bif v3.16b, v8.16b, v9.16b
  movi v4.16b, 0
  bif v4.16b, v8.16b, v9.16b
  movi v5.16b, 0
  bif v5.16b, v8.16b, v9.16b
  movi v6.16b, 0
  bif v6.16b, v8.16b, v9.16b
  movi v7.16b, 0
  bif v7.16b, v8.16b, v9.16b
  movi v8.16b, 9
  movi v9.16b, 10

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
160204200871500229258010010080000100800005006400000200442006320063321801002008000020024000020063200631116020110099100100160000100010113116312006001600001002006420064200642006420064
16020420063150038258010010080000100800005006400000200442006320063321801002008000020024000020063200631116020110099100100160000100010111216212006001600001002006420064200642006420064
160204200631510147258010010080000100800005006400000200442006320063321801002008000020024000020063200631116020110099100100160000100010111117222006001600001002006420064200642006420064
16020420063151038258010010080000100800005006400000200442006320063321801002008000020024000020063200631116020110099100100160000100010111116222006001600001002006420064200642006420064
16020420063150038258010010080000100800005006400000200442006320063321801002008000020024000020063200631116020110099100100160000100010112116112006001600001002006420064200642006420064
160204200631500418258010010080000100800005006400000200442006320063321801002008000020024000020063200631116020110099100100160000100010111116232006001600001002006420064200642006420064
16020420063150038258010010080000100800005006400000200442006320063321801002008000020024000020063200631116020110099100100160000100010111116212006001600001002006420064200642006420064
16020420063150038258010010080000100800005006400000200442006320063321801002008000020024000020063200631116020110099100100160000100010111116222006001600001002006420064200642006420064
16020420063150038258010010080000100800005006400000200442006320063321801002008000020024000020063200631116020110099100100160000100010111216112006001600001002006420064200642006420064
160204200631500103258010010080000100800005006400000200442006320063321801002008000020024000020063200631116020110099100100160000100010111116222006001600001002006420064200642006420064

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss data (0b)18191e1f3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaec? simd retires (ee)? int retires (ef)f5f6f7f8fd
16002420071150000000011327800121280000128000062640000115200312005020050321800122080000202400002005020050111600211091010160000100000000010027821534411104200472201160000102006020060200602006020060
16002420050150000000044278001212800001280000626400001152003120050200503218001220800002024000020050200501116002110910101600001000000000100301152434422113200472201160000102005120051200512006020051
1600242005015000000004427800121280000128000062640000115200312005020050321800122080000202400002005020050111600211091010160000100000000010030115233442274200472201160000102005120051200512006020060
16002420050151000000072327800121280000128000062640000115200312005920050321800122080000202400002005020050111600211091010160000100000000010026841425211124200472201160000102005120051200512005120051
160024200501500000000442980012128000012800006264000011520031200502005032180012208000020240000202222006311160021109101016000010000000001002784142541143200472201160000102005120060200602005120051
160024200501500000000862780012128000012800006264000011520031200502005032180012208000020240000202192006311160021109101016000010000000001002783232522134200472201160000102005120051200512005120051
1600242005015000000120672780012128000012800006264000011520031200502005032180012208000020240000202352021711160021109101016000010000000001002783142522143200562201160000102005120051200512005120051
160024200501500000000442780012128000012800006264000011520031200502005032180012208000020240000202032006311160021109101016000010000000001002683132521144200562401160000102005120051200512005120051
160024200501500000000442780012128000012800006264000011520031200502005032180012208000020240000202122006311160021109101016000010000000001002784232522143200472201160000102005120051200512005120051
160024200501500000000502980012128000012800006264000001520040200592005932180012208000020240000202152006311160021109101016000010000000001002784142522134200472201160000102005120051200512005120051

Test 6: throughput

Count: 16

Code:

  bif v0.16b, v16.16b, v17.16b
  bif v1.16b, v16.16b, v17.16b
  bif v2.16b, v16.16b, v17.16b
  bif v3.16b, v16.16b, v17.16b
  bif v4.16b, v16.16b, v17.16b
  bif v5.16b, v16.16b, v17.16b
  bif v6.16b, v16.16b, v17.16b
  bif v7.16b, v16.16b, v17.16b
  bif v8.16b, v16.16b, v17.16b
  bif v9.16b, v16.16b, v17.16b
  bif v10.16b, v16.16b, v17.16b
  bif v11.16b, v16.16b, v17.16b
  bif v12.16b, v16.16b, v17.16b
  bif v13.16b, v16.16b, v17.16b
  bif v14.16b, v16.16b, v17.16b
  bif v15.16b, v16.16b, v17.16b
  movi v16.16b, 17
  movi v17.16b, 18

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2502

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)1e373f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16020440059300000184002516020610016010610016000050012800001400934011240038199730319996160100200160000200480000400564003811160201100991001001600001000010110116114003501600001004003940189400394003940039
1602044018829900004002516010010016003610016000050012800001400194003840085199940320043160100200160000200480000400754003911160201100991001001600001000010110116114008201600001004011340039401134003940039
160204400383000000403822516010010016000010016000050055346130400194003840038199730320070160100200160000200480000400384005711160201100991001001600001000010110116114003501600001004003940039400394003940189
1602044003830000004038225160100100160000100160000500128000004001940038400381997313320021160100200160000200480000400384003811160201100991001001600001000010110116114018501600001004003940039400394018940039
16020440237300001506102516010010016000010016000050012800000400194003840038199730319996160100200160000200480000400384003811160201100991001001600001000010110116114010901600001004003940039400394003940039
160204401883000000616502516010010016000010016000050012800000400194005540038200933320023160100200160000200480000400644006311160201100991001001600001000010110116114003501600001004003940039401894003940039
160204400383000000403822516010010016000010016000050012800000400194003840038199730319996160100200160000200480000401144003811160201100991001001600001000010110116214003501600001004003940039400394003940039
16020440038301000736102516010010016000010016000050012800000400194003840038199730319996160100200160000200480000400394005711160201100991001001600001000010110116114003501600001004003940113400394011340039
16020440038301000209401492516010010016000010016000050036929540400194003840188200930319996160100200160000200480000401884003811160201100991001001600001000010110116114003501600001004003940039400394003940039
1602044006530100004002516010010016010610016000050012800000400194003840038199730320070160100200160000200480000400384018811160201100991001001600001000010110116114003501600001004003940039400394018940189

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2502

retire uop (01)cycle (02)03l2 tlb miss data (0b)1e373f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)accfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaeb? simd retires (ee)? int retires (ef)f5f6f7f8fd
160024400923010004502516011610160106101600005012800001140019400384003819996032001816001020160000204800004003840039111600211091010160000100000010022313011162114240035208160000104011340039400394003940067
16002440038300000450251600101016010610160000501280000114001940038400752003703200181600102016000020480000400564003811160021109101016000010000001002231423162112440035208160000104003940067400394003940076
1600244007529901206702516011610160000101600005047289511140056401124003819996133200551600102016000020480000400384007511160021109101016000010000001002231314162112440035208160000104003940113400394006440039
1600244003830000067025160010101600001016000050128000011400564006640038199960320055160010201600002048000040038400381116002110910101600001000012271002231304162114440072208160000104007640039400394003940113
160024401123000004502516001010160000101600005054383401140056400384003820006025200451600102016000020480000400384003811160021109101016000010000001002231315162114640035208160000104003940076400394011340039
16002440038300007671002516001010160045101600005012800001140056400384007520006133200551600102016000020480000400384007511160021109101016000010000001002231334162112440035208160000104007640039400764003940083
1600244003830000456739251600601016002310160000501280000114001940075400381999603200181600102016000020480000400564003811160021109101016000010000001002261294162112440035208160000104006440039400764003940039
1600244003830000045149251600111016007610160000501280000114001940038400382000603200181600102016000020480000400744003811160021109101016000010000001002231264412115240035208160000104003940076400394004040039
160024400383000004567251600101016000010160000504728951114001940038400381999603200181600102016000020480000401124003811160021109101016000010000001002231294162114240035208160000104008340039401134003940039
1600244003830000067772516001010160076101600005054383401140019400384007520037133200921600102016000020480000400384003911160021109101016000010000001002231294162112440035208160000104003940039400764003940039