Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (single, post-index, H)

Test 1: uops

Code:

  ld2 { v0.h, v1.h }[1], [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 4.002

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.002

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
63005294742372001220110020000468929048001745440021000200210001000200010005000500623848922817294682956731040001000200020004004298832976411610011000100011000001002004010002120013090943269263164107120923338538172772672888210001622813571145621000200010002990029632297532967429601
63004297102381601190000008800047322914400177264002100120021000100020001000500050002385072273429354294823104004100020002002400029352293951161001100010000100002100000001000202001307992876908311496621105332238162163632912310001658113468150611000200010002998929916300362996529912
6300430132241200016000002000048432899200174474000100020001000100020001000500050002386252270429247294933104000100020002000400029468293311161001100010000100002100000001000202001325292396941314776921017324638132566692863510001639513252142811000200010002953829605295822953929476
63004295262372600200100030000469528940001734340001000200210001000200010005000500023880522762293312959132940001002200220024000294382972431610011000100001003021000000010002000013268951869353164136420757327238151669682892710001672613657150091000200010002998730063300522965629727
63004296122381900210100030000473428956001733440021000200210001000200010005000500223830322725292952952431040001000200020004000292752925311610011000100001000001000000010002020013462931368833110117420858339238211770632868510001619113367146021000200010002956829509294272961829455
6300429536239270016000002000046642900600175954002100020021000100020001000500050042390802277029182295433104000100020002000400029388292921161001100010000100002100000001000002001312096226911308256520997335838142572662882510001651113525146521000200010002978729720296172969429627
63004295572402000120000000000461629050011768540101000201010031002200410025015514124428522734295002957131040001000200020004000294672954011610011000100001000021000000010002010013190941669503130126321113328538322170672890210011630513301148651000200010002969829700298122997929709
6300429712239261123000122671760004580290010017550400210002002100010012000100050005010239225227722960329896310400010002002200040002971929668316100110001000010042210011004251000202001314793156964321176820952329538182764672875510011651113311145031000200010002960729768295642975729528
630042960723820112310012267176000470929266001785040021001200610021001200210005005500423867322761293252948513504004100020022000400429555293872161001100010000100390100300280510012020012971955669043151106721059331338232065702885110001637113507146201000200010002988930021298273009530020
6300429539236210021010002000046852898210176614000100020021000100020001000500050012387242273329361295103104000100020002000400029368295401161001100010000100000100000001000202301314994196878314596920972315138232161632866410001644613334146151000200010002951329471294562949529359

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.h, v1.h }[1], [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d5051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70205140316110200100110010100140023139577025901035010030003100004010030000100401236925533120316114109014002401400471400471307233131150801003020010000300006020020000500001400501400471150201100991004010010000100001100100000110000000100010100321001211113970550033066100002000050100140048140051140140140051140051
7020414005010850000011006000014010813958902590103501003000310000401003000010000124478653300421610682201400260140035140050130726313119480418302001000030000602002000050000140050140047115020110099100401001000010000010010000011000000010000110032101801113972250000669100002000050100140049140052140051140132140051
702041400351086000000000130000140036139579025901005010030003100004010030000100001244786533074616114109014002301400471400471307113131150801003020010000300006020020000500001400501400481150201100991004010010000100000100100000110000203100001100321011211113972250000999100002000050100140412140069140057140151140319
70204140050108600000100013000014004113956902590103501003000310000401003000010000124099553312811611410901400230140050140051130723313115080100302001000030000602002000050000140035140047115020110099100401001000010000010010000011000010010000010032361803113972250000069100002000050100140048140051140051140049140052
7020414004710860100000000000014003813955402590103501003000310000401003000010000123694553300421611273901400230140050140050130723313119480396302001000030000602002000050000140051140047115020110099100401001000010000010010000011000000610000000032341801213971750000660100002000050100140055140048140048140048140036
70204140050108500000110024000014003513957702590103501003000310000401003000010000123694353300421611410901400230140050140050130726313119480100302001000030000602002000050000140050140047115020110099100401001000010000010010000001000000010000110132101801113971750000999100002000050100140065140039140052140139140051
702041400471086000000000460000140129139630025901035010030000100004010030000100001244786533004216113891014009101400501400351307263131194803983020010000300006020020000500001400501400501150201100991004010010000100000100100000110000000100001100321011211113971750000996100002000050100140060140051140051140051140048
70204140050108500000000012000014003513955202590103501003000310000401003000010000123692553300421611410901400273140035140050130726313115280100302001000030000602002000050000140051140047115020110099100401001000010000010010000011000010610000010032101801113972250000906100002000050100140425140168140048140052140051
70204140143108600000002226588001140306139589025901035011030007100004024130120100001244786533111316114109014009601402391403181307881713128180411305671012130122609422000050603140683141075515020110099100401001000010000010010000011000012310000100032101801113970550000996100002000050100140074140054140052140051140053
7020414005010860000000009030000140035139589025901035010030000100004010030000100001244786533004216114109014002401400511400501307263131138801003020010000300006020020000500001400501400531150201100991004010010000100000100100000110000103100001100321018011139722500000109100002000050100140055140053140051140051140048

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0056

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f233a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
700251400641085100010100200114004413965625900165001030006100004001030000100001245934533351716115415140032140056140056130752313121580010300201000030000600202000050000140056140053115002110910400101000010000010100031110001011110000111100314078791313972550000999100002000050010140057140057140057140057140057
70024140041108511000000020001400411396562590016500103000610000400103000010000124596153336341611572414003214005614005713075831312158001030020100003000060020200005000014005614207634150021109104001010000100000101000211100020104100011111003140158771313972850000969100002000050010140057140057140157140057140057
70024140057108511000000020011401391396102590016500103000610000400103000010000124596153335171611552714003214005614005613075531312168030930020100003000060020200005000014005614005311500211091040010100001000001010002111000200011000011110031651487151413972850000999100002000050010140057140057140057140057140059
7002414005610861000100102001140041139656259001650010300061000040010300001000012459345333517161154151400321401451400561307553131212800103002010000300006002020000500001400561400531150021109104001010000100000101000111100030012051000011113131401487131313988350000069100002000050010140140140061140042140091140057
7002414013510861000011002001140038139658259001350010300061000040010300001000012459705333517161154151400321400561400561307523131215800103002010000300006002020000500001400561400581150021109104001010000100001101000121100480264100001111003140119171313982850000969100002000050010140057140057140057140057140151
70024140059108510001000010001400381396562590016500103001010000400103000010000124596153335171611541514003214005614005613077531312158001030020100003012160020200005000014005614005311500211091040010100001000001010001211000100111000011111031401487171413972850000969100002000050010140055140057140057140057140042
700241400561086100000100200014003813965625900165001030010100004001030000100001245961533359516115724140032140056140053130755313128580010300201000030000602642000050000140056140054115002110910400101000010000010100022110002000410000111100314089191213972850000969100002000050010140057140057140054140054140057
70024140056108610000000014001140041139656259001350010300061000140010300001000012459615333517161154151400321400561400591307553131215800103002010000300006026020000500001400531400561150021109104001010000100000101000221100010001100001111003140138781113972950000069100002000050010140057140057140057140057140057
7002414013410861000200109258801142562139726539008050041300141000340576302331011812528255338491161268581401751402401403491307883013130980735303891081033535665962233655252142778142752111500211091040010100001000001010005111000400116531100031111003140148781313972550000009100002000050010140058140057140042140057140057
70024140056108610001000020000140027139641259001350010300061000040010300001000012459615333517161099081400171400561400561307523131216800103002010000300006002020000500001400561400561150021109104001010000100000101000121100010011100001111103140119181413972850000099100002000050010140057140057140057140057140057

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.h, v1.h }[1], [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0075

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514015210860100000088014006013959725901035010030003100004010030000100001241944533226216116123014004701400751400751307311413117880100302001000030000602002000050000140075140065215020110099100401001000010000010010001110000900010001100032101121111397265000001013100002000050100140141140076140056140076140056
702041400551086010000010014006013959725901035010030003100004010030115100001237197533226216116123014004101400751400741307311613125480724303231008030124604422008050407140075140157315020110099100401001000010000010010006110002029510100031120328011221113995550043131313100002000050100142583142781142790142644140251
70204140264108800000415298801400601396172590103501003000310000402523000010000123717953322621611612301400510140075140075130750313117880100302001000030000602002000050000140075140065115020110099100401001000010000010010000110000700100001100321011211113972550000131013100002000050100140076140056140056140056140184
702041400681086000000013001400401396172590103501003000310005402423000010000123719753318821611498001400510140075140075130751313115880100302001000030000602002000050000140075140076115020110099100401001000010000010010000110000101115100001100321011211113974550000131010100002000050100140076140076140077140078140112
70204140075108600000004300140060139597259010350100300031000040100300001000012371795331499161149800140052014007514007513075131311688010030200100003000060200200005000014007514007511502011009910040100100001000001001000001000010100100001100321011211113974550000131313100002000050100140076140076140066140076140068
7020414007410860000000100140060139616259010350100300001000040100300001000012371895332301161164740140031014007614007513075131311788010030200100003000060200200005000014007514006511502011009910040100100001000001001000011000012303100001100321011211113974550000131016100002000050100140076140076140076140076140120
70204140065108500000002500140060139617259010350100300031000040100300001000012371795331499161161230140052014007514005513075331311808010030200100003000060200200005000014007514007511502011009910040100100001000001001000011000000610000110032101121111397455000001313100002000050100140077140079140076140076140066
7020414007510860000000400140061139617259010350100300031000040100300001000012371795331921161161230140051014007514026213075731311788010030200100003000060200200005000014007614006511502011009910040100100001000001001000011000000010000010032101121111397455000013010100002000050100140056140077140076140056140066
70204140055108600001001300140060139617259010350100300301000040100300001000012371795332262161161230140051014007514007713075131311788010030200100003000060200200005000014005514007511502011009910040100100001000001001000001000015400100001100321011211113974550000131013100002000050100140076140076140077140076140125
70204140075108600000006001400611396172590103501003000310000401003000010000123717953314991611612301400520140075140075130731313117880100304451012230245606902024250610140242140345415020110099100401001000010000010010003110003029653100041100328111111213994550020131010100002000050100140246140263140443140351140334

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0054

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7002514005211250010110040100014002113965025900135001030003100004001030000100001245925533274516113577014001214005214005213074831312188001030020100003000060020200005000014004914004911500211091040010100001000011010000001000000310000110031407874713972450000660100002000050010140037140038140050140050140037
70024140049112500000100210000014003713965325900135001030003100004001030000100001245856533274516115004014002814005214005313075331312128001030020100003000060020200005000014004914004911500211091040010100001000001010000011000000010000110031407874713972450000969100002000050010140053140053140050140050140053
7002414005011250000000040000014003713965225900135002230003100004001030000100001245943533336116115118014002814005214010413075131312118001030020100003000060020200005000014004914005211500211091040010100001000041010000001000000010000110031404877413972450000600100002000050010140053140053140111140053140037
7002414004911250000010070100014003713965225900135001030003100004001030000100001245925533336116115004014003214005214005213075131312118031030020100003000060020200005000014005214005211500211091040010100001000011010000001000010010001110031404877713972150000706100002000050010140053140050140154140053140053
7002414004911240001000070000014002113965225900135002030000100004001030000100001245897533336116113460114002814003614003613073531312118001030020100003000060020200005000014003614004911500211091040010100001000011010000001000000010001010031406876413972150000966100002000050010140053140054140053140137140037
70024140049112500000000135000001400371396372590013500203000310000400103000010000124592553333611611551501400121400361400521307577913143581506303881024230364605062000050000140052140049115002110910400101000010000110100000010007021629810007110031405877713994450000906100002000050010140054140050140050140144140053
7002414004911240000000026410000014003713965525900135001030000100004001030000100001245952533336116115004014002514004914005313073531312088001030020100003000060020200005000014005214003611500211091040010100001000011010000011000000010000100031407874713972450000999100002000050010140050140053140037140053140050
7002414004911250000000070000014003713965225900135001030003100004001030000100001245779533278516113460014002514005214005213073531312128001030020100003000060020200005000014005514004911500211091040010100001000041010000001000000010000110031407878513972150000669100002000050010140037140051140050140050140050
700241400491125000010001030000014002113965225900135001030003100004001030000100001245897533336116115401014002814042314005213073531312128001030020100003000060020200005000014005214005211500211091040010100001000011010000011000000010000010031407878813970850000999100002000050010140053140050140053140053140055
70024140052112400001000488001014003713965525900135001030003100014001030000100001245856533347816115401014002814003614004913075131312118001030020100003000060020200005000014003614004911500211091040010100001000011010000001000000010000100031405874813972150000669100002000050010140053140053140053140053140050

Test 4: throughput

Count: 8

Code:

  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.h, v1.h }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.h, v1.h }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.h, v1.h }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.h, v1.h }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.h, v1.h }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.h, v1.h }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.h, v1.h }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.h, v1.h }[1], [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e18191e1f222324373a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd load (98)inst simd alu (9a)inst ldst (9b)9e9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400205800426200000000310100300800271000026320100801001600738000080100160000800004447142375767699839930800238004280042299240329999320100200800001600002001600003200008004280042112402011009901001008000016000080000010080000023800150000108001361018600001511011611800390800009080000320000801008004380043800438004380043
4002048004262000000005301002008002716000263201008010016004480000801001600008000044460563757676998392408002380042800422992403299993201002008000016000020016000032000080042800421124020110099010010080000160000800000100800000238001400001480000611418700001511011511800390800009680000320000801008004380043800438004380043
40020480042621000000020000060800271660026320100801001600798000080100160000800004447142375768799839630800238004280042299240329999320100200800001600002001600003200008004280042112402011009901001008000016000080000010080000023800140100168001460100700001511011611800390800009680000320000801008004380043800438004380043
40020480042620010000032000060800271060726320100801001600698000080100160180800004446076375768699839240800238004280042299030329999320100200800001600002001600003200008004280042112402011009901001008000016000080000010080000023800140100080014601118600001511011511800391800000680000320000801008004380043800438004380043
4002048004262000000003101100080027166002632013380100160077800008010016000080000444711137576869982919080023800428004229924223299993201002008000016000020016000032000080042800421124020110099010010080000160000800000100800000238001300001780014611118700001511011611800391800009080000320000801008004380043800438004380043
400204800426210000000310000290800271060026320169801001600428000080100160000800004446522375750299839300800238004280042299020329999320100200800001600002001600003200008004280042112402011009901001008000016000080000010080000023800140000380011611515600001511021611800391800009680000320000801008004380043800438004380043
400204800426210000000320010290800271660026320100801001600738000080100160000800004447142375768699840131800238004280042299240329999320100200800001600002001600003200008004280042112402011009901001008000016000080000010080000015800000000080014601118000001511011611800391800009980000320000801008004380043800438004380043
400204800426210000000310010608002716604263201518010016003380000801001600008000044455263757686998412608002380042800422992403299993201002008000016000020016018232000080042800421124020110099010010080000160000800000100800000238001400001080000611118700001511011611800390800009680000320000801008004380043800438004380043
4002048004262001000003200003008002716600263201478010016001480000801001600008000044471113757689998392408002380042800422990303300003201002008000016000020016000032000080042800421124020110099010010080000160000800000100800000238001401001480013611118710001511011611800391800000680000320000801008004380043800438004380043
400204800426210100100320010608002710000263201348010016004580000801001600008000044455263757376998363208002380042800422992403299993201002008000016000020016000032000080042800421124020110099010010080000160000800000100800000238001400201780013611418700001511011611800390800009080000320000801008004380043800438004380043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2324373a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd alu (9a)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)dfe0e7? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
40002580042643000000092000198002706600263200578001016006080000800131600008000044418373757693998325512108002380042800422994703300203203412080000160000201600003200008004280042112400211091010800001600008000001080000019800172017800166017000150585831187172623109938003918000021910080000320000800108004380043800438004380043
40002480042642000000023000698002710600263200718001016027680000800101600008000044466693757695998329013108002380042800422940318502630104320010208000016000020160000320000800428004211240021109101080000160000800000108000001980016000800176000001506058311811172623101393800391800002500080000320000800108004380043800438004380043
40002480042643000000044000098002716000263203668001016000080000800101600008000044466673757703998393014108002380042801852994703300213200102080000160162201600003200008004280042112400211091010800001600008000001080000021800000008001700000015061613121917282411973800390800002360080000320000800108004380043800438004380043
40002480042643000000060002898002716004263200718001016005480000800101600008000044466503757692998395113108002380042800422994703300223200102080000160000201600003200008004280042212400211091010800001600008000001080000021800160017800006001900150606131181117252510893800390800002460080000320000800108004380043800438018580043
400024801836430000100330000980169106017232001080010160044800008001016000080000444666937576769983290121080023800428018329947033002132001020800001601662016000032000080042800421124002110910108000016000080000010800000218001500148001761171900150625832181017262310101038003918000028510080000320000800108004380043800438004380043
4000248004264300000103500016980027106052632005580010160060800008001016033280000444664037577029984208121080023800428004229947113263002132001020800001600002016000032000080042800421124002110910108000016000080000010800000080015001780017612000015062613219111726241210938003908000023910080000320000800108004380043800438004380043
400024800426430000000400003980027166045263200708001016006080000801841600008000044463443757695998390812108002380042801852994703300213200102080000160000201600003200008004280042112400211091010800001600008000001080000020800170011928001661140001506064311810172623106738003918008521813080000320000800108004380043800438004380043
400024800426430000000480000980027166002632007180010160000800008001016000080000444666737576949983902141080023800428004229947033002132001020800001600002016000032000080042800422124002110910108000016000080000010800000198001700168001561171900150655833191117282511873800390800002330080000320000800108004380043800438004380043
400024801836430000000300000980027166022632005780010160061800008001016000080000444666937576769983290131080023800428004229944033002132001020800001601662016000032033280183800421124002110910108000016000080000010800850080016002355801006115000150866132188242623109938217718016621710080000320000800108032880043803288018580473
4000248047064400001181819617600409801721662044683204768017716055580166800941601668024944419603755958998154713108013480184801842990721527301853203422080083160329201601663203328018780611171240021109101080000160000800000108016801980185102376800980016000151155831181024262310121338003908016622011080000320000800108047580185803298018580328