Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (single, D)

Test 1: uops

Code:

  ld2 { v0.d, v1.d }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.002

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.002

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)0e0f1e2223243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
63005293382195101300010045732875300017210300220021000200010005000238063227322917729368310300010002000100040002915329171116100110001000010003100003100021212880920068373047080206143094381311534928306162971417615038100020002926029257291742916529265
63004293032205100000700045772884800017182300020021000200010005000238027227032905729311310300010002000100040002928629298116100110001000010003100000100001313076937168153173059206513131381220545328342162711399515090100020002935729250293422934029360
630042932422060000001310045902873500017236300420021000200010005000238067227512923229348310300010002000100040002916929246116100110001000010003100200100220312771907967993021259206623020380919595928369163051411415215100020002928929303293002926629276
63004293842187000000210047152879201017189300220021000200010005000239077227252911529407310300010002000100040002915429176116100110001000010003100000100001212892912468313035149206453041380812475228279161671399015276100020002932429273292582933129274
630042947921820001002100475228731000172003002200010002000100050002387382269929053292761310300010002000100040002921029241116100110001000010002100000100032313137913567963069064206603053380810505228325163131395415123100020002925829239292322933929284
63004292652205000100800045712885801117243300020021000200010005000239074227312907829257310300010002000100040002916129222116100110001000010003100002100021312906911567953108155206513036380815535228331163981408915070100020002931229290292072929029236
630042929721980000001910047112882901017120300220021000200010005000239025226512904729349310300010002000100040002918929181116100110001000010003100000100020313649919970863068155206273093381313605128336161821407815195100020002920929278292692939629354
63004293022204100100400048842872300017187300220001000200010005000239194227372908629181310300010002000100040002919229154116100110001000010000100003100021312784908768293040154205543050380415575528390160561395915168100020002925529207292952926329271
63004293582194000100400044952886800017194300020001000200010005000238694227112909629257310300010002000100040002908529110116100110001000010003100000100031212861901868433068153206293088381312494728502163661417715082100020002919629307292712929229283
63004292472202100110410045682877001017183300220021000200010005000239053227372904729274310300010002000100040002912429318116100110001000010003100000100000012683905168313022262206583046381116605728456156231420815096100020002927429349293162931129240

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.d, v1.d }[1], [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0053

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f181e2223243a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70205140110104911000002100114003813959213933212935925801064010030006100003010030000100001264381669368220082149001400290140053140053130549031311497010030200100003000060200100005000014005314005311502011009910040100100001000001001000331100010127710000111110321011281113956540000066100002000040100140054140054140054140054140054
70204140053104911110001100114002613945013934512935925801064010030006100003010030000100001275377669459420082581001400290140053140053130561031311497010030391100003000060200100005000014005314005311502011009910040100100001000001001000130100010125310000110120321011281113956540000000100002000040100140054140054140042140042140054
702041400531049111111011000140032139431139338129353258010340100300031000230100300001000012643556693388200812630014002301400471400471305430313111870100302001000030000602001000050000140047140047115020110099100401001000010000010010000011000000010000101000321011281113956540000666100002000040100140054140054140046140054140054
70204140085104911110002100114002613959213934512935925801064022330054100003010030000100001264381669368220082149001400290140053140053130561031311497010030200100003000060200100005031514007814004111502011009910040100100001000001001000131100020128310000111130321011281113956540000066100002000040100140054140054140054140054140054
702041400531050111100021001140038139450139345129359258010340100300031000030100300001000012643586693682200803610014002901400531400531305490313114970100302001000030000602001000050000140053140053115020110099100401001000010000110010001211000201110000110100321011281113956540000606100002000040100140054140054140054140054140054
70204140053104911010002000114003813945013934512935925801064010030006100003010030000100001264381669368220082149001400290140041140053130561031311377010030200100003000060200100005000014004114004111502011009910040100100001000001001000211100020036110000111110321011281113956540000606100002000040100140054140054140042140054140054
702041400531050101100020101140038139450139345129359258010640107300061000030100300001000012643816693682201012390014001701400531400531305610313116670100302001000030000602001000050000140053140053115020110099100401001000010000010010001211000201110000111100321011281113956540000666100002000040100140054140054140042140054140042
702041400531049101100020001140038139592139345129359258010340100300061000030100300001000012643136693682200821490014002901400531400531306360313114970100302001000030000602001000050000140053140053115020110099100401001000010000010010003211000201110000110110321011281113956540000666100002000040100140042140054140054140054140054
702041400531050111100020100140026139592139345129359258010640100300031000030100300001000012644176693682200821490014002901400531400531305610313114970100302001000030000602001000050000140053140041115020110099100401001000010000010010002311000201110000111100321011281113955440000660100002000040100140054140054140042140042140054
70204140053105010100002010114003813945013934512935925801064010030006100003010030000100001264381669368220091077001400290140053140053130561031311377010030200100003000060200100005000014005314005311502011009910040100100001000011001000111100010126210000010110321011281213956540000066100002000040100140054140054140042140042140054

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03mmu table walk data (08)0e0f181e1f2223243f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140051104900000000014003613948513934412934225800134001030003100003001030000100001264783669279120081843111400271400511400511305693131176700103002010000300006002010000503191400511400541150021109104001010000100000101000011000001010000110031406121241395734000026010100002000040010140052140036140052140055140052
70024140051104900000000014003613948713934412934125800134001030003100003001030000100001264783669358420079451011400271400511400511305693131177700103002010000300006002010000500001400351400511150021109104001010000100000101000011000070010000111031402121421395574000028100100002000040010140052140055140036140055140052
700241400511049000060100140020139507139344129357258001340010300001000030010300001000012647836693731201051410114002714005114005113056931311777001030020100003000060020100005000014005114005111500211091040010100001000001010000110000000100001100314021212413955740000281010100002000040010140055140052140055140036140052
70024140035104901101010014003613956413934412935725800134001030003100003001030000100001264783669377920079451011400271400351400541305723131161700103002010000300006002010000500001400541400351150021109104001010000100001101000011000000010000000031403121351395734000028100100002000040010140036140055140052140036140052
700241400351049000010000140020139507139325129357258001340010300031000030010300001000012647836693584200818430114001114005114005113056931311767001030020100003000060020100005032014005814005111500211091040010100001000001010000010000000100001000314041212413957640000281013100002000040010140052140052140052140055140036
70024140051104901101000014002013948713934612936025800134001030003100003001030000100001264719669358420081843011400271400351400511305693131176700103002010000300006002010000500001400351400511150021109104001010000100001101000011000000010000110031402121241395734000028010100002000040010140052140052140052140052140052
700241400511049000010000140036139507139346129341568002740010300031000030010300001000012675536698116200818430114009614005114003513056931311797001030020100003000060020100005000014005114005111500211091040010100001000001010000110000000100001100314041204213957340000281010100002000040010140052140036140055140052140074
70024140051104900045500100140036139487139344129357258001340010300031000030010300001000012647196693584200894780114003014005114003513059131311797001030020100003000060020100005031414003514005111500211091040010100001000001010000110000100100001100314021212413957640000281110100002000040010140052140052140052140036140036
700241400511049000010100140036139564139346129360258003040010300031000030010300001000012648196693731200806090114001114005114003513056931311767001030020100003000060020100005000014005114005411500211091040010100001000001010000110000000100001100314021212413957640000281013100002000040010140055140055140055140036140055
700241400511049000027010014002013948713934612935725800134001030003100003001030000100001264783669358420079451011400271406451402651305693131176700103002010000300006002010000505311400561400351150021109104001010000100000101000011000000010000110031404121241395734000028013100002000040010140055140036140055140055140055

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.d, v1.d }[1], [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0682

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e22243a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70205140496105411010021011406671402761399721299852580103401003000610000301003000010000126994667238212017257411406581406821406821311873131777701003020010000300006020010000500001406821406821150201100991004010010000100001100100013110001010110000110110321021292214019240000101010100002000040100140683140683140459140683140683
70204140682105211110020011404431402761399721299872580119401003000310000301003000010000126994667238212017257411405171406531406951309643131777701003020010000300006020010054500001406821406821150201100991004010010000100000100100021110002002110000111110321011292214019240000101010100002000040100140683140683140683140683140683
70204140682105311100011011406671402811399721299852580106401003000610000301003000010000126994667238212017257411406581406821406821309643131777701003020010000301896020010000500001406821404581150201100991004010010000100000100100032110002001110000111100321021292113996940000101010100002000040100140683140683140683140683140459
70204140682105411110021011406671402761399721299852580106401003001610001301003000010000126994667130692017257411406581406821406821311873131777701003020010000300006020010106500001406821406821150201100991004010010000100000100100024110007013412110001111100321021252214018240000101010100002000040100140683140683140685140683140685
70204140682105211011121001406671402761397471299852580106401003000610000301003000010000126997367238212017257401406581406841406841311873131777701003020010000300006020010000500001406821406821150201100991004010010000100000100100013110002001828510002111130321021292214019240000101010100002000040100140683140684140683140683140462
702041406821052110000130011406681402761397471299852580106401003000610000301003000010000126994667238212017257411406591406821404581311873131553701003020010000300006020010000500001406821406865150201100991004010010000100001100100022110001001110000111110321021292214019240000101010100002000040100140683140683140683140683140683
702041404581054110100210114066714027613997212998525801064010030006100003010030000100001269946672382120172574114065814068214068213118731317777010030200100003000060200100005000014068214068211502011009910040100100001000001001000121100010012210000111100321011292214019240000101010100002000040100140683140683140683140683140683
70204140682105111110121011406671402761399721299852580106401003000610000301003000010000126994667238212017257411406581406821406821311873131777701003020010000300006020010000500001406841406841150201100991004010010000100000100100012110001002410000011120321021292214019240000101010100002000040100140459140686140683140683140685
70204140458105410110021011406671402761399721299852580124401163000610000301003000010000126994667238212017257401406581406821406821311873131777701003020010000300006020010000500001404581406821150201100991004010010000100000100100011110003002110000111100321021291214019240000101010100002000040100140683140683140459140683140683
70204140682105211000011011406671402761399721299852580106401003000310000301003000010000126997367238212017257401406581406821406821311873131777701003020010000300006020010000500001406821406821150201100991004010010000100001100100012110001000410000111100321021292213996940000101010100002000040100140683140683140683140683140683

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0260

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e22233a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140870105310100002101140676140235139975129985258001640010300061000030010300001000012704656723821201725740140658140682140465131197031318067001030020100003000060020100005000014068214068211500211091040010100001000001010001311000200011000011111013234015122402213978140000101010100002000040010140261140261140261140261140261
7002414026010510000000101014002113971613955212956525800104001030003100003001030000100001266662670361320111930014023614003614031213077903131384700103002010000300006002010000500001402601402601150021109104001010000100001101000001100010100100001010000314002120003613978140000101010100002000040010140356140263140265140263140261
700241402601051000000010101402451397161395521295652580013400103000310000300103000010000126666267036132011193001402361402601400361305540313138470010300201000030000603901000050000140260140260115002110910400101000010000010100000110000000010000101000031400212210221397814000010010100002000040010140261140261140261140261140261
7002414003610490000000101014024513971613955212956525800134001030000100003001030000100001266662670361320111930114023614026014026013077703131384700103002010000300006002010000500001402601402601150021109104001010000100000101000001100000000100001010000314002122003613978140000101010100002000040010140261140037140261140037140261
700241402601051000000010101402451397091395521293422580013400103000010000300103000010000126472867036132011193001402361400361402631307770313138470010300201000030000600201000050000140260140260115002110910400101000010000010100000110000000010000101000031400212210331397814000010100100002000040010140261140037140261140261140261
7002414026010510000000001014024513971613955212956525800134001030003100003001030000100001266662670361320079595014023614026014026013077703131384700103002010000300006002010062500001402601402601150021109104001010000100000101000001100000000100001010000314002122002213978140000101010100002000040010140261140261140037140261140261
700241402601049000000010001402451397161395521295652580013400103000310000300103000010000126666267036132011193011402361402601402601307770313138470010300201000030000600201000050000140260140260115002110910400101000010000010100000110000000010000101000031400612210221397814000010100100002000040010140261140261140261140261140261
7002414026010510000000100101402451397161395521295652580013400103000010000300103000010000126666267036132011193011400121402601402601307770313138470010300201000030000600201000050000140260140260215002110910400101000010000010100000110000000010000101000031400212010331395584000001010100002000040010140261140261140261140037140261
7002414026010510000000101014024513948813955212956525800134001030003100003001030000100001266662670361320111930014023614026014026013077703131384700103002010000300006002010000500001402601402601150021109104001010000100000101000001100000000100001010000314002122003213978140000101010100002000040010140261140261140261140261140261
7002414026010510000000101014024513971913976012956525800134001030003100003001030000100001266662670361320111930014001214003614026013077703131384700103002010000300006002010000500001402601402601150021109104001010000100001101000001100000000100001000000314006122203213978140000101010100002000040010140261140261140261140261140261

Test 4: throughput

Count: 8

Code:

  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.d, v1.d }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.d, v1.d }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.d, v1.d }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.d, v1.d }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.d, v1.d }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.d, v1.d }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.d, v1.d }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.d, v1.d }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.6263

retire uop (01)cycle (02)03mmu table walk instruction (07)090e0f191e222324373a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd alu (9a)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
40020550128389010103710016415013821000262401001001600008000010016000080000500400192192374605009105014450072003242401002008000016000020080000320000501035006711240201100991001008000016000080000110080000008003103280000612535015110616445016311410800003200001005006850104501415012350102
400204501683760000012100005005229002624049410016024780000100160000800005004001931916967050074050108501030773242401002008000016000020080000320000500875010111240201100991001008000016000080000010080000029800320318000061260015110416445006411414800003200001005006850161500685015450169
4002045006737500000000015305005229002624010010016051980000100160000800005004002031926662050084050164501350873242401002008000016000020080000320000500675010111240201100991001008000016000080000010080000029800310328003161310215110416445006411410800003200001005011750068501865016950068
400204501853770000001002761501190991726240632100160176800001001600008000050040020919169670500840500725011703439124010020080000160000200800003200005006750067112402011009910010080000160000800000100800000080031031800006100015110416445010511414800003200001005010750169502745011050139
400204500673750011038100267150094299412624048210016022880000100160000800005004000001933839050083050089501500553812401002008000016000020080000320000501475011911240201100991001008000016000080000010080000029800310328003161260015110416445006411010800003200001005015850109501585010950211
400204500673750000037000431150119099152624044810016038280000100160000800005004001931935407050084050152501620141324240100200800001600002008000032000050108501081124020110099100100800001600008000001008000002980031008003160260015110416445006411414800003200001005011150068501205019650109
40020450113375000003710019205005229946262406031001602408000010016000080000500400192193235905007705018650072067324240100200800001600002008000032000050099500671124020110099100100800001600008000001008000003580032008000001320015110416445006421414800003200001005006850068500685012550068
4002045010837400000000012705012721010026240482100160503800001001600008000050040000019244020501710502445083984515117240100200800001600002008000032000050102501081124020110099100100800001600008000001008000002980000008003161253501511041644501001010800003200001005016150107501805016550158
400204501443760000001001811501202092426240390100160718800001001600008000050040019219248170501000500745022105732424010020080000160000200800003200005010150067112402011009910010080000160000800000100800000298003203480000612629015110316445016511414800003200001005010950109500685010450068
400204501033750000038000431050145090026240299100160331800001001600008000050040020419169670500480502095022600395240100200800001600002008000032000050067501021124020110099100100800001600008000001008000002980000031800006131001511041644500641140800003200001005012050161500995006850175

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.6260

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2224373a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696b6d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd alu (9a)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4000255011037510310000038010222450051299002624030510160000800001016000080000504001351911249315500580500785016504735324001020800001600002080000320000500785009111240021109101080000160000800000108000000800320031800326026001503029138058179852945500411981080000320000105007950045500775006750079
400024501023750000001006290101923500640000026240170101601548000010160000800005040000019182263155003905004450080022344240010208000016000020800003200005007850092112400211091010800001600008000001080000029800310035800310126350150343214904317119643305006311171480000320000105010350067500455006750079
40002450066375100001100380107045002929900262401891016023580000101600008000050400204191124901550046050085500440473232400102080000160000208000032000050064501031124002110910108000016000080000010800000298003100080026612535015030291380421798535425009201021080000320000105011950124500455009650090
4000245010637510000000000001324500572900026240193101602698000010160000800005040000019112493155006605004450066067323240010208000016000020800003200005008150075112400211091010800001600008000001080000008003100268000061263501503029138043179854343500411128080000320000105004550096500795008150045
400024500783750000010003700010535006529017026240010101600008000010160000800005040000019176020155006605006650044044345240010208000016000020800003200005006550108112400211091010800001600008000001080000008003100288000001262901503229138044179852844500412881080000320000105013450082500825004550096
4000245007937500000000000001074500630990026240203101600008000010160000800005040000019241023155006205007850080003572400102080000160000208000032000050081500441124002110910108000016000080000010800000080031103180032013135015034291310035179852842500881102080000320000105004550079500795008150096
4000245004437500000000000101193500632090026240193101602698000010160000800005040020319239830155006605004450103029360240010208000016000020800003200005007950071112400211091010800001600008000001080000029800000008003161313501503029158044179852743500410102080000320000105008050080500455007350045
40002450089375100000000380000350051291000262401891016000080000101600008000050400000192374041550059050078500800363232400102080000160000208000032000050078501031124002110910108000016000080000010800000298002500318000061323501503229138044179852944500922881080000320000105008050045500815007950079
4000245006437610000010000000550029291020026240214101601808000010160000800005040014419216994155002505004450078034367240010208000016000020800003200005007850083112400211091010800001600008000001080000035800310032800316126290150322913804417985433550074198080000320000105007450084500995085950068
40002450066375100001100380001644500572990402624021510160168800001316000080000504002041942222415500540501075013802834524001020800001600002080000320000501445008911240021109101080000160000800001108000002980000100800316132001503029158039179854344501111881480000320000105007150078500915004550045