Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, post-index, 2 regs, 16B)

Test 1: uops

Code:

  ld1 { v0.16b, v1.16b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f191e1f2223243a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)dfe0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
62005289132242101031100601004609284430223756300010002000100020005000100001208160492839128886323300020003000287952876111610011000100002004242004101220004242101318591986949315316120370326738152158583283141000156171303214506200010002870628573287022872328709
6200428666224012110000080100459528470002357430001000200010002000500010000410160502818528517310300020003000285822862911610011000100002003442003032220004242101344595937119314316119933326438182265592281831000150821245514252200010002850628653286522865728821
6200428695223011102100030100491528377002369930001000200010002000500010000300160712827628846310300020003000286832863711610011000100002003442003001220004262101305195387076330205820041324338251957602279811000151591288913933200010002866728647286502894028770
6200428765223012111100020100478128351002360430001000200010002000500010000210160412815728562310300020003000286252855511610011000100002002462003001220004442101360894827031322106119972316738251558622280701000156001318314305200010002866528659285422851628683
62004285912220110121000110100490228287002358730001000200010002000500010000200160662826128718310300020003000286132862111610011000100002002362003002220004242001332696666984322805420147316938191866572281071000153611332614441200010002873928719286522873728302
62004287492230121011000801004731284360023655300010002000100020005000100007081606928387287721410300020003000289632885511610011000100002003462003101220026242101323294237047316105720104323638222062602281051000155061260814750200010002889228779292722928128701
620042965922501211220001530104578287792024219300010002000100020005000100001500160412856429258310300020003000291622908011610011000100002003542004001420026482101289492826855314915820728304338092152543283271000162181368615249200010002929929371293252927329177
62004293442200111111000600014573288100024121300010002000100020005000100001700160522855629378310300020003000290082920411610011000100002003342002000220006242101294293056885306605820679309838152462603283721000161831374615344200010002920429217293592933429357
6200429194219010111100050100448128727002418730001000200010002000500010000200160262856029351310300020003000291062908011610011000100002003362005000420004262101303690686838304715920682305338131957583284671000162431377015163200010002933729234292752937629346
62004292742190121020000601004551288050024194300010002000100020005000100001000160572866029381310300020003000291052911911610011000100002004342005001220004402101321191346875311416020685306638191568583284421000163851377615130200010002935129225293122932329293

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld1 { v0.16b, v1.16b }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0057

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)daddfetch restart (de)e0e8? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
60205120041965120000014002120043987691097472580106501041000220000401001000020000138511555737372346686601200331200571200601121453112511701003029220000100006020030000100001200571200541150201100991004010010000100000100200033220002010220000222200032701320111216300500346692000050100120244120320120144120309120333
602041203169651002113226826402120225986071098816980588503691008120084432931070621110140048635737276347280701201811202161203401122712311263170505303882012610095605863028810095120236120315315020110099100401001000010000010020006522000800152000022222003210116011119830050004101092000050100120058120058120101120058120058
6020412005996410111000400112004298765109749258010650104100022000040100100002000013851155573353234678700120033120059120057112147311251970100302002000010000602003000010000120057120057115020110099100401001000010000110020002322000201022000022220003210116011119830050004101092000050100120058120058120058120058120058
602041200579641011000040021200459876510974625801065010410002200004010010000200001385161757395323465941012003312005312005811214531125157010030200200001000060200300001000012005812005711502011009910040100100001000001002000332200020002200002222000321011601111984305000414662000050100120061120058120058120059120058
602041200589641011000040021200429876910974625801065010410002200004010010000200001385138757382363467203012003312005712005311214531125157010030200200001000060200300001000012005712005311502011009910040100100001000001002000222200030112200002222000321011601111983205000410652000050100120058120058120059120058120057
60204120057964101000001600212003898769109742258010650104100022000040100100002000013851155573622034670270120033120057120057112145311251570100302002000010000602003000010000120053120056115020110099100401001000010000010020002222000200052000022220003210116011119826050004106102000050100120058120058120058120058120054
602041200539651010000040021200489877010974625801065010410002200004010010000200001385138757335323467786012003312005912005711212931125157010030200200001000060200300001000012005712005411502011009910040100100001000001002000332200020015200002222000321011602111983045000410682143450100120061120058140804120064120054
60204120064965110110004002120042987701097462580106501041000220007401001000020000138512715733580346851101200331200571200571121453112515701003020020000100006020030000100001200571200411150201100991004010010000100000100200032220002000520000222200032381160111198300500046692000050100120058120056120058120061120060
602041200579641010100040021200429877010974625801245010410002200004010010000200001385161957385243529122012036412040612005711214531128247010030200200001003060200300001000012040112005311502011009910040100100001000001002000232200020311420000222210032101160111198340500041410122000050100120058120064120062120062120058
60204120061931111100004002120043987691097532580106501131000220000401001000020000138561425734012346608801201031200611200621121493112499701003020020000100006020030000100001200611200571150201100991004010010000100001100200032220002001220012222220032101160111198390500041010102000050100120062120067120062120062120062

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0055

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cdcfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
60025120051931000000005001120040964841097402580013500121000120000400101000020000138492515733628346546201012003112005612005511216931125347001030020200001000060020300001000012005512005111500211091040010100001000011020000022000000032000022003140007175711983050002141092000050010120052120059120056120056120056
600241200599310000000020011200409648010974425800135001210001200004001010000200001384925157339163465524000120039120055120056112169311253470010300202006810000600203000010000120051120055115002110910400101000010000110200000220000000320000200031400081777119830500021510132000050010120056120056120056120056120056
600241200559310000000014001120041964861097442580013500121000120000400101000020000138494775733628346546600012003312005512005511216631125347001030020200001000060020300001000012005112005111500211091040010100001000041020000022000000002000022003140006177611983050002140132000050010120056120147120056120056120056
6002412005593100000001140011200409648410974425800135001210001200004001010000200001384925157336283466878000120031120151120059112162311254470010300202000010032600203000010000120055120051115002110910400101000010000110200000220000000320000220031400051777119830500021410132000050010120056120056120056120056120056
600241200559310000000020011200209648610974725800135001210001200004001010000200001385085557364823465466000120037120055120055112166311253570010300202000010000600203000010000120055120052115002110910400101000010000410200000220000020020000220031400371787119906500021510132000050010120056120036120059120059120056
600241201429310000000020011200409631010974525800135001210001200004013310000200001384925157337243465466000120031120059120055112162311253470010300202000010000600203000010000120051120051115002110910400101000010000410200000220000000020000220031400071787119830500021414132000050010120052120056120056120056120056
6002412005793000000000146104011202269648410974425800135001210004200004001010000200001384925157336283465466015120031120055120055112166311253370010300202000010000600203000010000120055120051115002110910400101000010000110200000220000000620000220031400071776119830500021410132000050010120056120056120056120056120056
600241200579310000000013400112012596480109744258001350012100012000040010100002000013849483573362834653500001200311200551200551121663112536700103002020000100006002030000100001201461200511150021109104001010000100004102000002200000003200002200314000101768119830500021414132000050010120057120144120056120056120061
600241200559310000000120011200409648410974425800135001210001200004001010000200001384925157336283466998000120037120055120057112162311253470010301162000010000600203000010000120055120055115002110910400101000010000410200000220000010020000220031400061768119896500021414132000050010120056120036120056120052120056
6002412005593100000000180011200409648010974425800135001210001200004013010000200001384925157336283465350000120031120055120055112167311253170010300202000010000600203000010000120059120051115002110910400101000010000410200000220000000020000220031400361776119832500021416132000050010120056120062120056120056120058

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld1 { v0.16b, v1.16b }, [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)0e0f18191e1f2223243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch indir (93)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
60205120052930000000200001120040987681097442580103501021000120000401001000020000138509235733628346562600120032120051120051112143311252170100302002000010000602003000010000120055120051115020110099010040100100001000001002000002200000000200002020003210011611119828500021410132000050100120056120056120056120056120052
60204120051930000000200001120040987631097442580103501021000120000401001000020000138509235733628346562600120031120057120035112123311251370100302002000010000602003000010000120055120051115020110099010040100100001000001002000002200000000200002020003210011611119828500021414132000050100120036120052120053120056120056
60204120056931000000200001120040989051097402580103501021000120000401001000020000138509235733628346562600120031120055120055112143311250970100302002000010000602003000010000120055120051115020110099010040100100001000001002000002200000000200002020003210011611119828500021610132000050100120060120056120056120056120052
6020412005493100000020000112004098767109744258010050100100012000040100100002000013850575573362834656260012003112005612005511214431125137010030200200001000060200300001000012005112005111502011009901004010010000100000100200000220000000020000202000321001161111982850002141402000050100120056120052120052120056120036
60204120058931000000201001120040986421097402580103501021000120008401001000020000138509235733628346562600120031120386120454112336911251370306302002000010000602003000010000120055120052115020110099010040100100001000001002000000200000400200002020003210011611119828500021410132000050100120056120036120056120056120036
60204120055930000000200001120040987671097442580103501021000120000401001000020000138510395733628346562600120032120055120035112143311251370100302002000010000602003000010000120096120051115020110099010040100100001000001002000002200000000200002020003210012411120163500131410132000050100120056120052120056120060120056
60204120055931000000200001120040987671097402580175501021001720000401001000020000138510395733628346571311120360120056120058112143311251470100302002000010000602003075610787123284123002115020110089956100401001000010000010020000022000202002000020200032100116111198245000210692000050100120053120052120048120054120053
60204120053964000000200101120036987631097422580103501021000120000401001000020000138504595733436346551000120011120051120049112139311251170100302002000010000602003000010000120051120047115020110099010040100100001000001002000002200000000200002020003210011611119920500026002000050100120052120052120048120036120052
60204120051931000000200000120036987631097402580118501021000120000402261000020000138504595733244346551000120027120051120051112139311261770100302962000010000602003000010000120047120047115020110099010040100100001000001002000002200000200200042020003210011611119808500026692000050100120052120052120052120048120048
60204120051931010000200101120036987631097362580103501021000120000402231000020000138512685733436346551000120027120052120051112161311258970100302002000010000602003000010000120161120047115020210099010040100100001000001002000002200000011020000202000321001161111982650002106112000050100120052120036120052120058120049

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0055

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f223a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
60025120051931010000002001120042964841097402580013500121000120000400101000020000138492515733628346535000120027012005512005511216631125347001030020200001000060020300001000012005512005111500211091040010100001000001020000022000006200002203140006178711983050002101092000050010120056120056120052120056120052
6002412005593100000000680011200409648410974425800135001210001200004001010000200001384925157336283465350101200310120055120055112167311253470010300202000010000600203000010000120056120051115002110910400101000010000010200000220000032000022031400061747119831500021410132000050010120056120056120056120056120059
6002412005193100000000140011200409648310974425800135001210001200164001010000200001384936757336283467092101200310120058120055112406311253470010301162000010000600203000010000120055120051115002110910400101000010000010200000220000002000422031400071777119830500021610132000050010120056120053120063120056120056
600251200559310000000020001200409648410974925800135001210001200004001010000200001385510657337723465350001200310120055120055112166311253470010301162000010000600203000010000120051120051115002110910400101000010000010200000220000002000022031400071776119830500021414132000050010120056120056120056120056120056
60024120055930000011002000120043964801097462580013500231000120000400101000020000138492515733628346535000120031012005512005511216631125347001030020200001000060020300001000012005712005111500211091040010100001000001020000022000400200002203140008178511983050002141092000050010120059120057120056120056120056
600241200559310000000020011200409637310974425800135001210001200004001010000200001384878757336283465350001200310120141120055112166311253570010300202000010032600203000010000120055120051115002110910400101000010000010200000220000002000222031400071776119830500021414132000050010120056120056120056120056120052
6002412005293100000000320011200409648410974425800135001210001200004001010000200001384925157366743465350001200320120051120051112166311253470010300202000010000600203000010000120055120051115002110910400101000010000010200000220000002000022031890071788119830500021414132000050010120056120056120056120056120052
600241200559300000000020001120040964841097452580013500121000520020403751000020000138492515733628346713500120031012005112016111216231125307001030020200001000060020300001000012005512005111500211091040010100001000001020000022000002285200002203140007174711983050002141692000050010120056120056120056120056120052
600241200579300000000050001200409648410974425800135001210001200004013010000200001384925157336283465350001201110120055120055112166311253270010300202000010000600203000010000120055120053115002110910400101000010000010200000220000102000022031400061777119826500021414132000050010120056120056120148120056120052
600241200559300000000020011200409648510974525800335001210001200004001010000200001384878757334843465466001200320120051120051112169911253670010300202000010000600203000010000120055120051115002110910400101000010000010200000220000044882000022031400041784119830500021014132000050010120056120056120056120056120052

Test 4: throughput

Count: 8

Code:

  ld1 { v0.16b, v1.16b }, [x6], x8
  ld1 { v0.16b, v1.16b }, [x6], x8
  ld1 { v0.16b, v1.16b }, [x6], x8
  ld1 { v0.16b, v1.16b }, [x6], x8
  ld1 { v0.16b, v1.16b }, [x6], x8
  ld1 { v0.16b, v1.16b }, [x6], x8
  ld1 { v0.16b, v1.16b }, [x6], x8
  ld1 { v0.16b, v1.16b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2d5map dispatch bubble (d6)dadbddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
16020580040620000000056002800252121282524010080100160000801001600002019718365937900800158004080041599533600002401002001600002002400008004080042118020110099100100800008000011001600000271600320103216003261504213151100011600118003808000099160000801008004180042800418004180041
1602048004062000011003800180025201520252401008010016000080100160000200971236692890080015800408004159954359998240100200160000200240000800418004011802011009910010080000800000100160000035160031000341600316131000511000116001180037180000610160000801008004180042800418004180041
16020480040620110000056002800252121211252401008010016000080100160000200680036760160080015800408004059953185999824010020016000020024000080040800401180201100991001008000080000010016001313411600510005016003761120120511000116001180037080000109160000801008004180041800418004180041
16020480040620000000049001800252161621252401008010016000080100160000202965436759581080015800418004059954359998240100200160000200240000800408004111802011009910010080000800001100160000027160031000311600316131270051100011600118003718000067160000801008004180041800438004180041
1602048004062110000006800080025212121125240100801001600008010016000019836723672662008001580040800405995435999824010020016000020024000080040800401180201100991001008000080000110016001212421600130005116003761504213151100011600118003808000090160000801008004180041800418004180041
1602048004062000000004500180025215151725240100801001600008010016000020147603671365008001680041800425995435999924010020016000020024000080040800411180201100991001008000080000110016000002716003200033160032612335005110001160011800370800001010160000801008004180041800418004180041
1602048004062010100005800080025312121125240100801001600008010016000020333193672670008001580040800405995435999824010020016000020024000080040800401180201100991001008000080000110016001313411600500115216003761504113051100011700118003708000099160000801008004180041800418004180041
160204800406210000000290008002521616152524010080100160000801001600002009712366932800800158004080042599543599982401002001600002002400008004080040118020110099100100800008000001001600000351600320003116003360323500511000116001180037180000126160000801008004180041800418004180041
160204800406201100000120028002531615152524010080100160000801001600002009712366932800800158004080040599543599992401002001600002002400008004080041118020110099100100800008000001001600141341160052001501600386150013051100011600118003708000099160000801008004180041800448004180043
160204800406201000000560008002521316212524010080100160000801001600002003136365937900800158004080040599543599982401002001600002002400008004080040118020110099100100800008000001001600131301600500005216003900504312051100011600118003708000090160000801008004180041800418004180041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d cache miss st nonspec (c0)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? int retires (ef)f5f6f7f8fd
1600258004062110101100420028002520121625240010800101600008001016000020194683669355108001580040800405997636002024026420160000202400008004080040118002110910108000080000010160000001600360039160036613240000502031632800370800001410160000800108004180041800418004180041
16002480040621000000004200280025212121625240010800101600008001016000019846013669341008001580040800405997536002024001020160000202400008004080040118002110910108000080000010160000035160000003616003661320000502021634800370800001414160000800108004180041800418004180041
1600248004062000000000420028002521212025240010800101600008001016000020245703666341008002380040800405997636002024001020160000202400008004180040118002110910108000080000010160000035160036003616000060324000050203163380037080000014160000800108004180041800418004180041
16002480040620000000004200080103212121625240010800101600008001016000020311633669355008001580040800405997536002024001020160000202400008004080040118002110910108000080000010160000035160036003616003661364000050203163380037080000014160000800108004180041800418004180041
160024800406200000000042002800252121218252400108001016000080010160000203116336693550080015800408004059977360020240010201600002024000080040800421180021109101080000800000101600000351600360036160036613243000502031633800370800001410160000800108004180041800418004180042
160024800406210000000042002800250121216252400108001016000080010160000202457036761331080015800408004059976360020240010201600002024000080040800401180021109101080000800000101602360351600360036160036613140000502031633800370800001414160000800108004180041800418004180041
160024800406200000100042002800252121216252400108001016000080010160000202457036793060080015800408004059975360020240010201600002024000080040800401180021109101080000800000101600000421600360036160032613240000502031633800370800001414160000800108004180041800418004180041
16002480040620000000004200280025212121625240010800101600008001016000020245703669355008001580040800405997536002024001020160000202400008004080040118002110910108000080000010160000040160036003616000061040000502031633800380800001410160000800108004180041800418004180041
16002480040620000000000002800252012025240010800101600008001016000020391533672662008001580040800405997536002024001020160000202400008004080040118002110910108000080000010160000035160036003616003661324000150203163380037080000010160000800108004180041800418004180041
16002480040621000000004200280025212016252400108001016000080010160000202457036793120080015800408004059976360020240010201600002024000080040800401180021109101080000800000101600000351600001036160036613240000502031633800370800001014160000800108004180041800418004180041