Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2R (4H)

Test 1: uops

Code:

  ld2r { v0.4h, v1.4h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.006

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.006

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e223a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
63006284702135112100305136279720001616330082008100020001000500023886061022781285722819231030001000200010002000281972843911610011000100001001231003013100021310139331002973603449063199023300381420605327802143361243913077100020002830728236285212826828650
6300428379212211110050524528145001161133008200210002000100050002391204182280428121283593103000100020001000200028414285531161001100010000100222100201210000131013687945972373214055194843448381425565427836143131232413043100020002821728306282742827227831
6300428353214511110050506428001011164573008200810002000100050002387607002278628124281333103000100020001000200028618286122161001100010000100333100301110000331113833965970823479050195133328381219554927886149321223813029100020002811528130281672846328254
6300428614211511110050489028136000161833008200210002000100050002388604082276728421282623103000100020001000200028354283441161001100010000100130100302410003131214012985072653450156197473465381820575428048141181265313391100020002826028193281392831628485
6300428552214311010041484727998011166003008201010002000100050002381803102276128121280753103000100020001000200028337286151161001100010000100323100200310022131013297989770963416048194133335381827645227979147801257013366100020002831728136283542832028229
6300428300211710100010506628094000162143008200810002000100050002381804072281628173283043103003100020001000200028196282261161001100010000100320100101110003131114099962672583262151197423448381915595827959147291268513504100020002831128195281942843028206
63004285712115101100905150279860101634530022008100020001000500023886060022797283702828231030001000200010002000280742818411610011000100001002201003128100003313140561008971583348156195743399382016595827809137991246613526100020002823128500284752846928422
63004282132114111000905046280500101636430102006100020001000500023906030822785279192820631030001000200010002000281032819311610011000100001003221002122100023311139401031271813438153193303214381126675627904138871315512928100020002833428274281592831428080
63004281982125101100504942280730001644530062006100020001000500023896041822816284202829831030001000200010002000285382844411610011000100001002431003011100121310139481007070463421149197113161381615576127854142361244512703100020002817328394281172858728167
63004284642144000000015221281130101633430062000100020001000500023880040822768281342823031030001000200010002000282122813311610011000100001000031001101100121300139911021269683436160195053407381715545727868140081233412665100020002819728055281012862328495

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.4h, v1.4h }, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e22233a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514005710491010100002101140042139652139350129363258010640100300061000030100300001000012642106693878200827450140033014005714005713056503131155701003020010000300006020010000300001400351400511150201100991004010010000100000100100022110001011100001111100321021282213956940000101010100002000040100140058140058140058140058140058
702041400571049101010000210114004213965213935012936425801064010730003100013010030000100001267569670322120199035114002701400511400511305590313114770100302001000030000602001000030000140051140035115020110099100401001000010000010010002111000301110000110110032102128221395694000001010100002000040100140052140052140036140036140053
7020414005110490000000000000140020139561139325129357258010040100300001000030100300001000012642106693091200803611140033014005714005713056503131153701003020010000300006020010000300001400571400571150201100991004010010000100000100100033110002001100001111100321021282213956940000101010100002000040100140058140058140058140058140058
70204140057104910101000021011400421394281393501293632580106401003000610000301003000010000126430466938782008274501400330140057140057130565031311537010030200100003000060200100003000014004114005711502011009910040100100001000001001000121100010111000011011003210212821139569400000010100002000040100140058140042140058140042140058
7020414005710491001001005001140042139652139350129363258010640100300031000030100300001000012642106693878200803610140033014005714005713056503131153701003020010000300006020010000300001400571400571150201100991004010010000100000100100022010004031100001010000321021282213956940000101013100002000040100140055140052140052140052140052
702041400351049000001100010014002013940413934412935725801034010030000100003010030000100001264210669387820082745114003301400571400571305490313115370100302001000030000602001000030000140057140057115020110099100401001000010000010010000001000000310000101000032102128221395444000001010100002000040100140682140617140305140597140542
702041406571053000000000000014003613956113934412934125801034010030006100003024230000100001264210671059320087131014003331400611400571305650313168071798311641026630956617821042231108140451140057115020110099100401001000010000010010003411000243165291000011111003210212822139571400000100100002000040100140053140052140052140036140055
70204140035104900000000030000140036139561139344129357258010340100300031000030100300001000012643046693878200803610140033014005714005713056503131153701003020010000300006020010000300001400571400571150202100991004010010000100000100100000110000100100001000000321021282213956440000101010100002000040100140052140052140052140052140052
70204140035104910000000015100140020139561139344129357258010340100300061000030100300001000012643046694022200827450140033014005714005713056503131153701003020010000300006020010000300001400571400571150201100991004010010000100000100100012110001001100000111000321021282213955440000101010100002000040100140135140066140052140052140036
7020414005110490000000001000140036139561139325129451258010340100300031000030100300001000012643636693584200818430140027014005114005113055903131126701003020010000300006020010000300001400351400511150201100991004010010000100001100100011010002011100001111000321021282213955440000101010100002000040100140058140042140058140042140058

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)03l1i tlb fill (04)090e0f1e2223243f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7002514004710490100110014003213949113933812935325800134001030000100003001030000100001264771669279120081263114001101400471401271305683131175700103002010000300006002010000300001400501400351150021109104001010000100000101000011000000010000101031409120181513957240000960100002000040010140048140048140048140048140048
70024140035104900017000140035139491139325129356258001340010300031000030010300001000012647486693535200794511140026014003514012213056831311617001030020100003000060398100003000014014914005511500211091040010100001000001010000110000000100000010314014121171813955740000669100002000040010140036140048140051140048140036
70024140050104900001100140020139491139343129356258001340010300031000030010300001000012647716693535200812631140026014005014005313057131311617001030178100533079460020100003000014005014004711500211091040010100001000001010000110000000100000010314015121171513955740000966100002000040010140036140051140051140048140048
7002414005010480000110014002313944613933812935325800104001030003100003001030000100001264719669338820081263114002601400351401151305683131175700103002010000300006002010000300001400501400351150021109104001010000100000101000001000000010000101031401012117713957240000969100002000040010140036140036140036140036140051
70024140035104900000100140035139446139325129356258001040010300031000030010300001000012647486692791200812631140026014005014005313057131311617001030020100003000060020100003000014012514005711500211091040010100001000001010000010000100100001010314016120151713956940000699100002000040010140051140051140036140051140051
70024140050104900001000140035139487139343129341258001040010300031000030010300001000012647196693535200812631140011014005014005613056931311757001030020100003000060020100003000014003514004711500211091040010100001000001010000110000000100001010314018120161413956940000999100002000040010140051140051140051140051140051
70024140035104900002510014002013948713932512935625800104001030003100003001030000100001264719669279120081697114001101400501400961305713131175700103002010000300006002010000300001400501400351150021109104001010000100000101000011000000010002001031401512171713957240000669100002000040010140051140051140051140051140051
70024140050104900006000140035139491139325129356258001340010300091000030010300001000012647486693535200812631140011014005014011013056531311757001030020100003000060020100003000014004714004711500211091040010100001000001010000110000000100001010314016120171713957240000699100002000040010140051140051140048140036140051
7002414005010490000100014002013949113934312934125800134001030003100003001030000100001264719669353520081263114002301400351400531305793131161700103002010000300006002010000300001400501400471150021109104001010000100000101000001000000010000100031401612191513957240000900100002000040010140036140051140051140048140036
700241400351049000033100140035139491139343129356258001340010300031000030010300001000012647716693535200812631140011014003514005613055431311617001030020100003000060020100003000014005014004711500211091040010100001000001010000110000000100001010314017120171713968540000999100002000040010140051140051140048140036140051

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.4h, v1.4h }, [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0260

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514087810541110100701011406671402761399721299852580106401003000610000301003000010000126994667238212017257401406581406821406821311873131777701003020010000300006020010000300001406821406821150201100991004010010000100000100100011110001021100001111000321011292213996940000101010100002000040100140261140261140261140261140261
7020414026010500001000000001402471397701395521295652580103401003000310000301003000010000126623267036132011193001402361402601402601307673131355701003020010000300006020010000300001402601402601150201100991004010010000100000100100000010000100100001010000321021291113977240000101010100002000040100140261140037140261140037140261
702041402601051000000010100140021139770139552129565258010340100300031000030100300001000012662326703613201119301140236140264140260130767313112770100302001000030000602001000030000140260140260115020110099100401001000010000010010000011000000010000101000032102133221397724000010010100002000040100140261140037140261140037140261
702041402601048000000010100140021139770139585129568258010340100300031000030100300001000012662326703613200795950140334140260140260130777313135570100302001000030000602001000030000140036140036115020110099100401001000010000010010000011000000010000101000032102129111397724000010010100002000040100140261140261140261140261140261
7020414026010510000000001001400211397701393261295652580103401003000310000301003000010000126872667037092011193011400121402601402601307673131355701003020010000300006020010000300001402601402601150201100991004010010000100000100100000110000000100001010000321021291213977240000101013100002000040100140261140261140261140261140261
70205140260105100000003601001402451397701395521295652580103401003000310000301003000010000126623267036132011193011402361402601402601305323131355701003020010000300006020010000300001402601402601150201100991004010010000100000100100000110000000100001010000321021332213977240000101010100002000040100140261140261140261140037140037
7020414026010510000000160100140245139770139552129565258010340100300031000030100300001000012662326703613200795951140236140265140260130767313135570100302001000030000602001000030000140260140260115020110099100401001000010000010010000001000000010000001000032102129121397724000010100100002000040100140261140262140037140037140261
7020414003610510000000101001402451397701393261293422580103401003000010000301003000010000126623266928392011193001402361402601400361305323131355701003020010000300006020010000300001402601400361150202100991004010010000100000100100000110000103100001010000321021292213977240000101010100002000040100140261140037140261140263140261
702041402601050000000010100140021139770139554129565258010040100300031000030100300001000012662326703613201119300140236140260140260130767313135570100302001000030000602001000030000140036140260115020110099100401001000010000110010000011000000010000101000032101129111395454000001010100002000040100140261140037140261140262140261
70204140260105100000001010014024513940513955612956725801004010030003100003010030000100001264319670361320111930014023614026014026013076731313557010030200100003000060200100003000014026014026011502011009910040100100001000001001000001100000001000000100003210212922139772400000100100002000040100140037140261140261140037140261

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0464

retire uop (01)cycle (02)030e0f1e22243f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cdcfl1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
700251403821052001101404501394881397531297682580010400103000310000300103000010000127266967163152010136501404400140592140499130767371315797068630020100633000060020100003000014046714046711500211091040010100001000011010000110000100100061000314001712113913998540000666100002000040010140465140037140456140465140465
700241404641052001001404491398621393261297682580013400103000010002300103000010000126853266947112014249311404400140464140464130776313116270010300201000030000600201000030000140464140464115002110910400101000010000010100001100000001000000003140091218713998440000666100002000040010140037140465140468140469140465
7002414046410520010014044913986213975612976825800134001030000100003001030000100001268532671571820081899114001201400361404671309803131589700103002010000300006002010000300001400361404641150021109104001010000100000101000011000000010000110031400912181014003940000060100002000040010140037140037140465140037140465
7002414046410520011014044913986213932612976825800134001030000100003001030000100001268532669461520083627114001201404641400361309803131589700103002010000300006002010000300001404641404641150021109104001010000100000101000011000002010000110031400713512913998440000066100002000040010140465140465140465140037140465
7002414003610520000014044913986213932612976825800134001030003100003001030000100001268532669595920141629114001201404641404641305543131589700103002010000300006002010000300001404641404641150021109104001010000100000101000011000000010000110031400812191113998440000066100002000040010140037140465140465140465140465
700241404641052006101404491398621397531293422580010400103000310000300103000010000126472867151902014134111404400140464140464130554313116270010300201000030000600201000030000140036140464115002110910400101000010000010100001100000001000010003140081208713998440000666100002000040010140037140465140465140465140465
70024140464105211110140449139862139753129768258001040010300031000030010300001000012690636713702201411971140440014003614046413098031311627001030020100003000060020100003000014046414046411500211091040010100001000001010000010000001510000110031400812112913955840000666100002000040010140465140260140037140465140465
70024140036105200110140021139488139753129342258001340010300001000030010300001000012685326695911201414850140440014003614046413098031315897001030020100003000060020100003000014046414046411500211091040010100001000001010000110000000100001000314001212111713955840000666100002000040010140465140039140465140037140465
70024140464105200100140021139862139753129768258001040010300031000030010300001000012647286716582201411970140440014003614046413098031315897001030020100003000060020100003000014046414003611500211091040010100001000011010000010000000100001100314007121101013998440000666100002000040010140507140037140037140465140465
7002414046410520061014002113986213955112976825800134001030003100003001030000100001269015669307920141197014002801404641404641309803131162700103002010000300006002010000300001404641404641150021109104001010000100000101000011000010010000101031400712017713998440000606100002000040010140037140465140465140037140038

Test 4: throughput

Count: 8

Code:

  ld2r { v0.4h, v1.4h }, [x6]
  ld2r { v0.4h, v1.4h }, [x6]
  ld2r { v0.4h, v1.4h }, [x6]
  ld2r { v0.4h, v1.4h }, [x6]
  ld2r { v0.4h, v1.4h }, [x6]
  ld2r { v0.4h, v1.4h }, [x6]
  ld2r { v0.4h, v1.4h }, [x6]
  ld2r { v0.4h, v1.4h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03090e0f1e22243a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2402054007430001101004004401010252401071001600708000810016002080014500424347456014604004040059400419975610008240134200800141600262008001416002640059400591180201100991001008000080000110080000370800310002780031610371115116001600400381010800001600001004006040060400604006040060
240204400593000003700040044010102524017010016007080000100160000800005004313594305709140040400644005999733100172401002008000016000020080000160000400414005911802011009910010080000800000100800003708003116416380031613137000511001161140061100800001600001004006040060400424006040042
240204400593000003710040026010102524017010016007080000100160000800005004191541845094040033400414005999733100172401002008000016000020080000160000400414004111802011009910010080000800000100800003708000000085800316131370005110011611400561010800001600001004006040060400604006040060
240204400523000004110040044010025240170100160070800001001600008000050041905318450941400404004140059997331001724010020080000160000200800001600004004140059118020110099100100800008000001008000037080031000163800316100000511001161140056010800001600001004004240065400604006040060
24020440059300000010040044010102524017010016007080000100160000800005004184644560014140040400594006499733100172401002008000016000020080000160000400594004111802011009910010080000800000100800003708000000019080031613137000511001161140056010800001600001004006040065400424006540060
24020440059300010410004004401002524010010016007080000100160000800005004184154560014040022400594005999733100172401002008000016000020080000160000400644005911802011009910010080000800000100800003708003100038003161037000511001161140056010800001600001004006040060400604006040060
240204400593001004010040044010102524017010016007080000100160000800005124184394560014040040400414005999733100172401002008000016000020080000160000400594005911802011009910010080000800000100800003708003100011880031013137000511001161140056010800001600001004006040060400604006040060
24020440059300000370004002601002524017010016008280000100160000800005004186954560014040040400594005999733999924010020080000160000200800001600004005940059118020110099100100800008000001008000037080031000080031013100005110011611400561014800001600001004006040042400604004240060
2402044005929910037000400440100252401001001600008000010016000080000500418446456001404004040059400599973310017240100200800001600002008000016000040059400591180201100991001008000080000010080000008003100052800316031370005110011611400561014800001600001004004240060400604006040159
24020440059300000371004004401010252401701001600008000010016000080000500418413456001404002240059400599973310022240100200800001600002008000016000040059400591180201100991001008000080000010080000008003501064800316131370005110011611400561010800001600001004004240060400424006040060

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e223a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2400254006530010100631340050018181252400921016002480000101600008000050420067544008001400464006540065999603100452400102080000160000208000016000040065400471180021109101080000800000108001617418005310160800376153016105020716574006299080000160000104006640066400664006640066
2400244006530011010631340050201812524009210160082800001016000080000504162965440080014002840065400659996031004524001020800001600002080000160000400474006511800211091010800008000001080017174180052101798003761524116005020816664006299080000160000104006640066400664006640048
24002440065300101006213400500181812524008810160080800001016000080000684159195440080014004640065400659996031004524001020800001600002080000160000400654006511800211091010800008000001080016164180182001598002661534116005020816654006299080000160000104004840066400664006640066
24002440065300111006413400502181812524008810160082800001016000080000504157895440080014004640065400659996031004524001020800001600002080000160000400654006511800211091010800008000001080016174180053102768003661534116105020716774006299080000160000104006640048400484006640066
24002440065300110006313400502181812524009210160082800001016000080000504186665440080014004640065400479996031004524001020800001600002080000160000400654004711800211091010800008000001080017184180053100778003761534116105020816654006290080000160000104006640048400664006640066
2400244006530011100631340050118181252400901016008280000101600008000050415583544008001400464006540065999603100272400102080000160000208000016000040065400651180021109101080000800000108001816080054001878003761534116205020816564006299080000160000104006640048400484006640066
2400244006530011100201340050218012524003410160078800001016000080000504159395440082004004640065400659996031004524001020800001600002080000160000400474006511800221091010800008000011080017174180016001668003661534116005020516564006209080000160000104006640066400664006640066
24002440065300110006213400501181802524009210160082800001016000080000564154825440082014004640065400659996031004524001020800001600002080000160000400654006511800211091010800008000001080018174180053121838003861524116105020616664006299080000160000104006640066400484006640066
2400244006530011100621240050018012524009210160078800001016000080000504162425440080014004640065400479996031004524001020800001600002080000160000400654004711800211091010800008000001080016164180053101658000061534116005020816564004499080000160000104006640066400484006640048
2400244006530011101631340050218181252400921016008280000101600008000050415519544008201400464004740065999603100452400102080000160000208000016000040065400651180021109101080000800000108001617080053101418003760534116105020716674006299080000160000104006640066400664006640066