Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2R (post-index, 16B)

Test 1: uops

Code:

  ld2r { v0.16b, v1.16b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 4.002

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.002

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
630052952323720010000003000047352908711175444008100020061000100020001000500050002387813002270802940929530310400010002000200020002922129495116100110001000010000310010103100021201310693396935321505520853333638161257602888510001629513316144631000200010002953829536296112954129574
630042954523801001000003000046552905011174524006100020061000100020001000500050002387413002274602934329659310400010002000200020002944429367116100110001000010000410020101100121301327394926988319816321028327938042066532888010001625913225146261000200010002965129521297162956929577
63004296662391101100000301004610290801117579400610002006100010002000100050005000239481800227640295372963731040081002200220002000294162953711610011000100001000421004104435100021341332794077011321105521026322738122160552892510001635013256145621000200010002959229564296782964329689
6300429557239201010002339617610045762913700174554010100120061001100020021000500050412389210002282602943229849134740021000200220022004295832946441610011000100001003221001002010003110131469451698430850592104434193811964562909910001647113424145011000200010003023930311295992950729642
630042963723701001000101500004621289640017408401010002004100010002000100050005000238826102275102934629406310400010002002200020002927629306116100110001000010000310000003100001301314994926942313116520837327738161856572861310001619913203147821000200010002951229405294582945529385
63004294522362000000000400004655288961117483400610002004100010002000100050005000239520002281302927429469310400010002002200020002953529410116100110001000110040310000004100020201313893246961316405720874336238131060612872110001629713183142021000200010002931629351293462956229396
63004294292270000100001300004740289190017349400410002004100010002000100050005000238604002270002931329422310400010002000200020002919929297116100110001000010000310000000100010201306794006931317106320766325238111757582859710001632913135142061000200010002965029368293242940829538
63004294672280100000100000004772288780017328400010002000100010012000100050005000238543002274402922029359310400010002000200020002923029265116100110001000010004010010000100010001324892286964315416920655329338041457542873610001602813239143611000200010002944029424294242946129454
63004293722280100000000200004706290260017298400010002000100010002000100050005000238145002274302924529419310400010012000200020002929629301116100110001000010002010000000100121001334093556908313305020797339038081656602877310001613013266144911000200010002942529448295912947229399
63004294802280100000000000004559289481117302400610002004100010002000100050005000238620002271802928129468310400010002000200020002938429296116100110001000010000210000000100020201326393206912316605720833333938152065602868410001618213263143811000200010002928929310293802938529232

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.16b, v1.16b }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0097

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f233a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70205140051108600100000010001400201395962590103501003000310000401003000010000123720653313971611456011400300140051140051130711031311578010030200100003000060200200003000014005414005111502011009901004010010000100000100100000110000000010000001000032101121111397245000013013100002000050100140036140055140055140055140036
70204140056108600000000013000140039140785259010350100300031000040100300001000012372065331549161197220140030014005414005413071103131154804233020010000300006020020000300001400551400541150201100990100401001000010000010010000001000000031000010100003210112111139724500001300100002000050100140055140055140055140149140036
7020414003510860000000001201014002013959325901035010030003100004010030000100001237134533182916114560014003001400541400541307300313115780100302001000030000602002000030000140054140035115020110099010040100100001000001001000001100000000100001010000323311212113970550000131310100002000050100140055140052140060140055140052
70204140054108601100000029800014003913959625901035010030003100004010030000100001237224533158816114560014003301400541400541307300313115780100302001000030000602002000030000140057140054115020110099010040100100001000001001000001100000003100001010100321011211113972450000131313100002000050100140056140066140055140055140052
702041400541085000000000100014002213959825901035010030003100004010030000100001237867533147416114560014003001400541400351307270313115780100302001000030000602002000030000140051140051115020110099010040100100001000001001000001100000103100001010000321011211113972450000131013100002000050100140056140036140152140056140055
70204140054108601000000010001400391395932590103501003000310000401003000010000123767453316681611273901400300140035140035130737031311578010030200100003000060200200003000014005214015211502011009901004010010000100000100100040110000020181000011235000003210110112139722500006610100002000050100140051140051140052140137140051
70204140143108600000000010001400361395932590103501003000010000402433012110000124478653300421611410901400230140050140050130711031311958010030342100003012160200200943012014005014008011502011009901004010010000100000100100000110000060310000101000032361801113972250000969100002000050100140051140051140048140148140036
7020414007010850000000001000140038139552259010350100300031000040417300001000012447865330042161141090140026014005014005013071103131194801003020010000300006044620000300001400501400501150201100990100401001000010000010010001011000100030610000101000032101801113972350020999100002000050100140143140051140140140053140054
7020414016411030002100222801760014012813967711190161501223001110005405383035210079125442953354441611593201401630140346140328130814016131364807213057310081302426069620160303671402341403164150201100990100401001000010000010010003011000401012100001010000321018011139722500009610100002000050100140051140051140036140048140036
7020414007810860000001001000140083139555259010350100300031000040100300001000012447865330746161145601140026014004714005013072603131195801003020010000300006020020000300001400501400501150201100990100401001000010000010010000011000001045310000101000132101801113972250000606100002000050100140051140054140048140051140048

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
700251401491090211011113326435200140318139635259001350010300001000040010300001000012459525332706161151810140032014005414005413075327131213800103002010000300006002020096300001400521400511150021109104001010000100001101000000100000300100001100314063873213971950000669100002000050010140051140048140048140051140055
70024140047108600000000001300014003613965025900135001030000100004001030141100001245907533270616114779014002601400471400351307343131209800103002010000300006002020000300001400501400471150021109104001010000100001101000001100000000100001100314032873213972250000999100002000050010140052140054140051140051140051
7002414005010860000000000100014003613965025900135001030003100004001030000100001245925533317316114779014002601402411400501307513131209800103002010000300006002020000300001400501400471150021109104001010000100000101000000100000000100000100314033872213972250000099100002000050010140051140051140051140036140051
7002414004710850000000000100014003513965025900135001030003100004001030000100001245907533328516113346014002601400511400501307523131209800103002010000300006002020000300001400521400501150021109104001010000100000101000001100000000100001101314032982213972350000909100002000050010140051140051140051140051140051
70024140050108600000000001200014036713965025900135001030003100004001030000100001245847533328516114779014002601400351400351307493131194800103002010000300006002020000300001400501400351150021109104001010000100000101000001100000103100001100314032872213972250000996100002000050010140051140051140050140051140051
7002414005010850000000000100014008313965053900445001030019100024001030000100001245907533332316114898014002601400501400471307463131209800103002010000300006002020000300001400501400471150021109104001010000100000101000001100000100100000000314032873313972250000000100002000050010140051140051140053140049140051
7002414003510850000001100430001400351396352590013500103000010000400103000010000124593353335581611501301400310140051140050130749313120980010300201000030000600202000030000140053140052115002110910400101000010000110100000110000020846100001100314003873213970750000669100002000050010140036140048140051140051140051
70024140052112800000000006000140035139652259001050010300031000040010300001000012458475333285161147791140026014005014005013074931312098001030020100003000060020200003000014003514004711500211091040010100001000001010002201000710212905100021020379903873213994650038961936100002000050010140266140312140318140250140318
7002414033811260000001112397176011400201396502590013500103000310000400103000010000124590753327061611477901400110140050140050130749313120980010300201000030000600202000030000140035140047115002110910400101000010000010100000110000000121000010203140028733139719500001069100002000050010140051140051140051140051140051
70024140035108600000000000001140035139650259001350010300031000040010300001000012458795333285161147790140026014005114005013074931312098001030020100003000060020200003000014005014003511500221091040010100001000001010000011000000001000011003163021102213972250000960100002000050010140051140051140036140036140051

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.16b, v1.16b }, [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0075

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0e0f18191e1f2223243a3f404c4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514015511251000001880000140050001396072590120501733000310002401003023410039123720653322621611612301400521400751400751307513131181801003020010000300006020020000301211400551400651150201100991004010010000100000100100000110000000100000100321011211113972550000131013100002000050100140076140076140076140076140076
702041400751125000000100000140060001396172590103501103000310000401003000010000123717953318821611727101400981400761400651307413131178801003020010040300006020020000300001400741400651150201100991004010010000100000100100000110000000100001100321011211113974850000101313100002000050100140076140076140076140076140076
7020414007511250000006000001400500013961725901035010030003100004010030000100001237202533226216116123014005114109114032513078615131258801003044511289320626020020082301231401621400653150201100991004010010000100000100100260110004001281510003112032801116111417665002913130100002000050100140318140344140248140361140164
702041403291126111033396264000014006000139597259010350100300031000040100300001000012371795331499161161230140041140075140055130751313117880100302001000030000602002000030000140075140065115020110099100401001000010000010010000011000010010000110032101121111397455000013130100002000050100140076140076140076140076140076
7020414007611250010001300000140060001396172590103501003000310000401003000010000123717953314991612473201400511400751400751307513131178801003020010000300006020020000300001400751400651150201100991004010010000100000100100000110000103100001100321011211113974550000131013100002000050100140076140076140076140076140075
70204140075112500000010000014006000139617259010350100300031000040100300001000012371795332262161161230140051140075140075130751313117880100302001000030000602002000030000140075140065115020110099100401001000010000010010000011000000310000110032101121111397445000013140100002000050100140075140076140076140076140076
7020414007511250000001600000140060001396172590100501003000310000401003000010000123719753322621611612301400511400781400761307513131178801003020010000300006020020000300001400751400651150201100991004010010000100000100100000110000103100000100321011211113973550000131013100002000050100140076140120140056140056140076
702041400771125000000000000140060001395972590100501003000310000401003000010000123708353314991611498001400311400741400551307313131177801003020010000300006020020000300001400751400741150201100991004010010000100000100100000010000106100000100321011211113974650000131013100002000050100140076140076140077140076140076
702041400761125000000100000140040001395972590103501003000310000401003000010000123717953322621611727101400511400551400751307313131178801003020010000300006020020000300001400751400681150202100991004010010000100001100100000010000000100001100321011211113974550000131013100002000050100140076140076140076140056140078
702041400551125000000220000596140414221531396139290103501003000010000401003000010000123741753327681611636001400551400751400751307513131178801003020010000300006020020000300001400751400651150201100991004010010000100000100100000110000103100001000321011211113973850000131513100002000050100140076140076140066140076140076

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0052

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)09l2 tlb miss data (0b)0e0f18191e1f22233f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
700251400501085000100000130001400401396522590010500103000310000400103011610000124597953333611612257201400251400521400521307511613121180010300201000030000600202000030000140052140049115002110910400101000010000010100011100000001000101003140138781313972450000969100002000050010140053140053140148140054140053
7002414004910850001000001000140038139652259001350010300031000040010300001000012459255333361161213780140028140037140052130751313121180010300201000030000600202000030000140052140050115002110910400101000010000010100001100000001000011003140138781313972150000969100002000050010140037140053140053140058140050
7002414003710850001010008471080014003713964925900135001030003100004015130000100001245925533344016121657014002914005214005213075131312118001030020100003000060020200003000014005214005211500211091040010100001000001010000110000003100001100314013104714139708500009109100002000050010140053140053140053140053140053
700241401411086000100100100014003713965225900135001030003100004001030117100001245943533336116126313014003414005214005213075131312118001030020100003000060020200003000014014314005311500211091040010100001000001010001110000003190100001100314088713813972450000969100002000050010140053140053140056140053140053
70024140052108600010000018800140037139653259001350010300031000040010300001003912459255333361161208490140012140052140052130751313121180010300201000030000600202000030000140052140049115002110910400101000010000010100001100000032051000011003140148713813972450009996100002000050010140413140037140147140053140055
700241401441086000000032265176101402251396958290070500313001110003404343011810158125126253382921613816501401741401441403361308153591326148867830140101613024560744202443036314024314032341500211091040010100001000001010004010006009595100041100314088713713972450000969100002000050010140053140037140037140037140053
70024140052108600010000010001400211396362590013500103000310000400103000010000124595253333611612116501400281400371400541307353131195800103002010000300006002020000300001400521400491150021109104001010000100000101000011000010310000110031401387141313970850000699100002000050010140053140053140055140053140038
70024140052108600000110013880014003713965225900135001030003100004001030000100001245925533336116125366014002514005214005213075231311958001030020100003000060020200003000014005214005111500211091040010100001000001010000110000000100001100314088713813972450000999100002000050010140053140051140053140054140053
70024140049108600010110060001400371396522590013500103000310000400103000010000124585653333611611969801400281400521400541307533131211800103002010000300006002020000300001400521400521150021109104001010000100000101000011000000010000110031401387131313972450000099100002000050010140053140053140053140037140053
70024140052108500010010018000140039139652259001350010300031000040010300001000012459255333400161238000140028140053140049130735313121180010300201000030000600202000030000140052140052115002110910400101000010000010100000100001001000001003140138714813972450000969100002000050010140053140053140053140053140053

Test 4: throughput

Count: 8

Code:

  ld2r { v0.16b, v1.16b }, [x6], x8
  ld2r { v0.16b, v1.16b }, [x6], x8
  ld2r { v0.16b, v1.16b }, [x6], x8
  ld2r { v0.16b, v1.16b }, [x6], x8
  ld2r { v0.16b, v1.16b }, [x6], x8
  ld2r { v0.16b, v1.16b }, [x6], x8
  ld2r { v0.16b, v1.16b }, [x6], x8
  ld2r { v0.16b, v1.16b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2402058004162011001000310018002616610253201648010016006280000801001600008000044082033758374982636380022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800076238002600258001961262460511011611800381800000980000160000801008004280042800428004280182
240204800416201000110031000800261661225320162801001600628000080100160000800004408202375837398271508002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000010080008623800260212018001961262361511011615800381800009980000160000801008004280182800428004280042
2402048004162110110000320018002616612253201628010016006480000801001600008000044082113761941982636380022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800068238002500258001961252370511031611800380800000980000160000801008004280042800428004280042
240204800416201111110060008002616612253201648010016006280000801001600008000044081943758377982666080022800418004149924034999932010020080000160000200160000160000802178004111802011009910010080000800000100800088238002500268000061252370511011611800381800009980000160000801008004280042800428004280042
2402048004162010110000320118002616612253201668010016006480000801001600008000044082113758368982636380022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800001100800077238002600258000061262370511011611800381800000980000160000801008004280042800428004280042
2402048004162111001100430018002616612253201648010016006280000801001600008000044082053758375982638880022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800096238002600288001861262361511011611800380800009980000160000801008004280042800428004280042
24020480041621111000001750118002616612253201628010016006480000801001600008000044082113758363982636380022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800001100800078238002611268001861252370511011611800381800009980000160000801008004280042800428004280042
240204800416211001000070118002616612253201648010016006280000801001600008000044081983758363982549580022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800067238002610288001861262370511011611800381800009680000160000801008004280042800428004280042
240204800416211000000060008002610612253201628010016006280000801001600008000044082113758377982660480022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800078080026012880019612524705110116258003818000010980000160000801008004280042800428004280042
24020480041620111000004101080026166122532016480100160062800008010016026580000440819637583779826380800228004180183499240349999320100200800001600002001600001600008004180041118020110099100100800008000001008000772480025012680019012623605128116218003808000012980000160000801008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23243a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
240025800416200100000160000800261060442532005080010160032800008001016000080000440769637583709825697800228004180041499470350022320010208000016026020160000160000800418004111800211091010800008000011080000014800130100800130191805020101566800381800009680000160000800108004280042800428004280042
24002480041620010000029600008002600600253200488001016003280000800101600008000044076963758361982513980022800418004149947134350437322666208013616000020160000160000800418062711800211091010800008000001080087020800090001780014609180502061666800381800009080000160000800108004280183800428004280042
240024800416200000000900008017206000253200108001016004080000801421600008000044099573758375982551180022801818004149947035002232001020800001600002016000016000080041800411180021109101080000800000108000000800140101580009619180502041655800381800009680000160000800108018380042800428004280042
240024800416210000000200000800261660025320054800101600408000080010160000800004407696375836898347158002280041800414994703500223200102080134160000201600001600008004180041118002110910108000080000010800000148271101014800876110180502031565800380800006680000160000800108004280042800428004280042
24002480041620000000015100008014716607253200508001016004080000800101600008000044076963758373982570080022800418004149947035002232001020800001602652016000016000080041800411180021109101080000800000108000001480014000080000619180502031565800381800006680000160000800108018280042800428004280183
2400248004162000000002500008002600600253200508001016022080000800101600008013444076963758372982549380022801808018349947035002232001020801431600002016000016000080041800413180021109101080000800000108000000800140001080000619180502041677800380800009080000160000800108004280042800428004280042
2400248004162101000000000080026166002532004280010160038800008001016000080134440769637583619825511800228004180041499740350022320541208000016000020160000160000806208004111800211091010800008000001080088014800140001480010619180503981656800380800009980000160000800108004280042800428004280181
2400248004162000000001600008002616600253200508001016003280000800101600008013444076963758370982551180022800418018249947032500223200102080000160000201600001600008004180041118002110910108000080000010800000148001301013800136114140502031666800381800009080000160000800108004280042800428004280042
240024800416210000000320000800261660025320048800101600308000080010160000800004407696375836898255118002280041800414994703500223200102080000160000201600001600008004180041118002110910108000080000010800000148001400213800146110180502031566800380800939680000160000800108004280042800428004280042
240024800416211000000198800080168166044253204168001016004080000800101600008000044099683758371982568280022823178236149994198750103321615208013316027120160272160536803228018131800211091010800008000001080188014815810121442801086110181503843465803640800940680000160000800108018480189804738018480182