Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2R (1D)

Test 1: uops

Code:

  ld2r { v0.1d, v1.1d }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.008

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.008

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e22243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
63005287192142101111004105138279881116238300820081000200010005000238863052271928203281833103000100020001000200028055279871161001100010000100213100413110012231114044101057283330445195303465381310334127866146711244713099100020002822128333281502830928216
63004283812100101001004105218279261116003300820081000200010005000238694182275628090281583103000100020001000200028164282691161001100010000100333100502210012231014071105137319352728195803399381613373727780143101243612916100020002839728192281292810728117
63004283562110101001004105301278660016298300620081000200010005000238565102276228204282903103000100020001000200028099281681161001100010000100333100302210002221213694104307291341237194963318381413403527837138181259013191100020002857428378281802818428242
63004280722130101001004105251279831016232300820081000200010005000238863092277928142281973103000100020001000200028318281351161001100010000100223100311210012231213559103577384351334194773430381917393927812142741243113527100020002822228245282762810528025
63004281242090101001004105145280270116221300820081000200010005000238964182278228095283433103000100020001000200028111281741161001100010000100433100301110002231013755104247037355737195733389382012413727814141501284913536100020002834428415283352830928497
63004282932110100001004105041280011016087300620081000200010005000238884082272828123285713103003100020001000200028098282551161001100010000100133100322110002231213618101477247347336194983341381611394027896140241257612882100020002848928259281962841428224
6300428272211010100100210504627990101621230082008100020001000500023888400227642799628207310300010002000100020002802628207116100110001000010013310020221001123121370899977259348441193483400382214423827804148391240714103100020002833728314283412831328105
63004281492110101001003105115279540116023300820081000200010005000238903192275327995281233103000100020001000200028293280681161001100010000100332100500210012211214257103187205335436194533458381913343827879141411254313022100020002834528290281342831828083
63004281912141101000004105127278851115904300820081000200010005000238720082280828024280573103000100020001000200028417283331161001100010000100323100202110011231214055101457201326732194903430381210413627848149051292012921100020002859628325281912822928283
63004280482111101001004005204281160116188300820041000200010005000238864182281428005280403103000100020001000200028297281941161001100010001100333100302110012231213792104057252323138194783514381715434127796142021230613381100020002845428316282462846328150

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.1d, v1.1d }, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f2223243f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
702051400351049000000000001400201394311393381293412580100401003000010000301003000010000126432866927912008126311400110140047140047130531031311187010030200100003000060200100003000014004714004711502011009910040100100001000011001000001100000001000011000324621282113954440000900100002000040100140054140051140052140051140051
702041400501049000000000001400351395341393251293412580103401003000310000301003000010000126434266927912007945101400270140050140050130558031311267010030200100003000060200100003000014005014004711502021009910040100100001000011001000001100010001000011000321021531213956340000000100002000040100140051140036140051140036140051
7020414005310490000001301001400351394041393251293412580103401003000010000301003000010000126431066935352008126311400230140047140052130566031311467010030200100003000060200100003000014004714005111502011009910040100100001000011001000001100000001000011000321021282213954440000969100002000040100140070140055140051140051140060
7020414005010490000001010014008213953413934312935625801004010030003100003010030000100001264319669367920081697114002601400351400351305580313112670100302001000030000602001000030000140050140035115020110099100401001000010000110010000011000055031000011000321021281213955940000909100002000040100140051140048140048140048140051
702041400351049000100000001400201395341393251293562580103401003000010000301003000010000126431966935352007945111400260140050140050130558031311267010030200100003000060200100003000014004714004711502011009910040100100001000011001000001100000041000000000321011282213954440000009100002000040100140056140051140051140049140051
702041400501049001100000001400201394311393381293532580103401003000010000301003000010000126431066935352008169711400260140035140050130531031311267010030200100003000060200100003000014004714004711502011009910040100100001000001001000001100000001000011001321011281213956340000999100002000040100140140140061140048140048140051
702041400351049000000600001400351394311393381293532580103401003000310000301003000010000126432866927912007945101400230140047140047130543031311267046530200100003000060200100003000014005014004711502011009910040100100001000001001000001100000001000011000321021532213956340000660100002000040100140051140051140036140051140036
702041400501049000000100001400201395341393251293562580103401003000310000301003000010000126434266927912008169711400230140050140047130558031311467010030200100003019560200100003000014005014004711502011009910040100100001000001001000001100000001000001000321011532213956040000666100002000040100140048140048140048140036140048
702041400471048000000101001400321394041393381293532580103401003000310000301003016710000126432866933882007945111400230140047140035130531031311267010030200100003000060200100003000014005014004211502011009910040100100001000011001002541100292001000611000370143212213954440166000100002000040100140137140051140051140037140051
7020414005010491000001300001400351394341393511293562580103401003000010000301003000010000126434266935352007945111400260140050140035130558031311467010030200100003000060200100003000014005014005511502011009910040100100001000001001000001100000001000001000321021282213954440000090100002000040100140051140036140048140048140051

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0054

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140051104900000000100001400391395641393441293602580010400103000310000300103000010000126481966937312008184300140030140054140054130625313117670010300201000030000600201000030000140054140051115002110910400101000010000010100002110000000310001110031400412023139576400000018100002000040010140036140057140433140052140036
7002414003510490000110010100140039139584139346129360258001040010300031000530010300001000012648196692791200794510014003014005414003513057231311797001030020100003000060020100003000014005414005111500211091040010100001000001010000011000000015100001100314002120221395764000013010100002000040010140052140055140055140055140036
700241400541049010010001000014003913956413934612936325800104001030000100003001030000100001264819669373120082304001400301400511400521305533131161700103002010000300006002010000300001400541400351150021109104001010000100000101000041100000003100000100314002121241395764000013010100002000040010140036140036140036140055140055
70024140051104900011000120000140039139564139325129360258001040010300001000030010300001000012647196692791200818430014002714005414003513057231311797001030020100003000060020100003000014005414005111500211091040010100001000001010000211000001001000011003140021212213965340035131310100002000040010140441140279140265140233140356
7002414005410490000000045010014003913953613934612936025800134001030003100003001030590100001264819669279120082131011400301402451400581305533131161700103002010212300006002010000300001400541400545150021109104001010000100000101000001100000003100001100314002121221395764000001310100002000040010140052140036140036140055140055
700241400351048000000001010014003913947413934612936025800264001030003100003001030000100001264810669373120089300001400111400511400511305693131176700103002010000300006002010000300001400351400521150021109104001010000100000101000001100000100100001100314002121221395774000001010100002000040010140055140055140052140036140055
7002414003510490000000010000140039139487139346129343258001340010300031000030010300001000012648196693731200896470114001114005414015413057231311797001030020100003000060020100533000014003514003511500211091040010100001000001010000011000001001000010003140021212213957640000131310100002000040010140053140036140036140055140052
700241400541049000100004088000140036139507139344129341578001040010300001000030010300001000012647196693584200818430014002714005114005413057231311797001030020100003016060020100003000014005414003511500211091040010100001000011010000011000000001000011003140021212213957640000101310100002000040010140052140055140055140055140055
70024140035104900000000128800014013613950713932512936025800134001930003100013001030000100001264783669373120079451001400341400791400521305723131161700103002010000300006002010000300001400541401441150021109104001010000100000101000001100000000100001100314002121221395734000001310100002000040010140036140052140055140036140036
700241400351049000000000000014003913950713934612936025800134001030003100003001030000100001264819669373120082304001400111400541400541305723131179700103002010000300006002010000300001400541400511150021109104001010000100000101000001100000000100001100314002121231396524000010010100002000040010140055140055140055140055140036

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.1d, v1.1d }, [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0260

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)090e0f1e22233f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514005810510011325001408721404391395521295652580103401003000310000302493000010000126623266940392011221801408631400451402671314203131982701003020010000300006020010000300001402601402601150201100991004010010000100001100100001100000010000100032103133331403964000010100100002000040100140888140037140888140037140888
7020414088710510010037101400211394051401751301892580100401003000310000301003000010000126431967336312020200401408631408871400361307673131355701003020010000300006020010000300001400361400361150201100991004010010000100000100100000100000010000010032102129331397724000001313100002000040100140888140888140037140888140888
70204140934104900000120014087214043913932613018925801034010030003100003010030000100001272055669283920079595014086314088714003613082631311277010030200100003000060200100003000014088714003611502011009910040100100001000001001000011000000100001100321021733213954540000000100002000040100140037140261140037140261140888
70204140260105100100311014024513940814017513018925801004010030003100003010030000100001272055673363120111930114086314088714088713139131319827010030200100003000060200100003000014026014026011502011009910040100100001000001001000001000003610000010032103129331397724000001313100002000040100140888140888140037140888140888
702041408871049000001101402451398051393261295652580103401003000310000301003000010000126623266928392011193001400121402601402601305323131982701003020010000300006020010000300001408871403081150201100991004010010000100001100100001100000010000000032103129331403964000010010100002000040100140261140037140037140037140261
7020414088710500000013101408721404391401751301892580100401003000310000301003000010000126431967336312011193011408631400361408871307673131982701003020010000300006020010000300001408891402601150201100991004010010000100000100100000100000010000010032103129331397724000013130100002000040100140888140888140888140261140888
702041400361051000001001408721394051401751293422580100401003000310000301003000010000127209166928392007959511408631408871400361307673131127701003020010000300006020010000300001400361402601150201100991004010010000100000100100000100000310000100032102129331395454000001010100002000040100140037140261140261140037140037
702041400361051000001101402451397701395521295652580100401003000310000301003000010000126623267036132011193011400121400361400361307673131355701003020010000300006020010000300001400361400361150201100991004010010000100000100100001100001010000110032103133331395454000013013100002000040100140888140888140888140037140888
702041402601050000000001408721397701395521295652580103401003000310000301003000010000126431967036132011193011402361402601402601307673131127701003020010000300006020010000300001408871402601150201100991004010010000100000100100001100000010000100032103129331395454000013013100002000040100140888140037140037140888140888
7020414026010510010013101400211404391401771293432580103401003000310000301003000010000127207367336792011221811408631400361400361307673131982701003020010000300006051410159300001407351402603150201100991004010010000100000100100051100001010000010032103129231403964000013013100002000040100140040140037140037140888140888

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0260

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e2223243a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
700251404481051001000001100014024513971713955212956525800134001030003100003001030000100001266662670361320111930014023601402681402621307770313138470010300201000030000600201000030000140260140260115002110910400101000010000110100000110000100100001011000314041224314020140000101010100002000040010140261140261140037140261140343
700241402611050000000001100014024513948813955212934225800134001030003100003001030000100001264728670361320079595114023601402601402601307770313138470010300201000030000600201006230000140262140260115002110910400101000010000010100000110000000100001010000314031224313955840000101010100002000040010140037140261140261140261140037
700241402611051000000001000014066713982713974712998525800164001030006100003001030000100001272485672382120172574114023601402611402601305540313138470010300201000030000600201000030000140036140039115002110910400101000010000010100000110000000100001011000314031222414020140000101010100002000040010140261140037140261140261140037
700241402601050000000001100014024513971613955212956525800134001030003100003001030000100001266716670361320111930114023601406841406821311970313180670010300201000030000600201000030000140260140260115002110910400101000010000010100000010000009100000011400314031212413997840000101010100002000040010140459140683140459140459140683
700241404581054100000006000014024513971613955212956525800104001030003100003016030000100001266662670361320111930114023601406821404611311990313180670010300201000030000600201000030000140260140260115002110910400101000010000010100000010000000100001010000314031224313978140000101010100002000040010140261140261140261140037140037
700251402601050000000001100014024513971613955212956525800134001030003100003001030000100001266662670361320111930014023601402601402601307770313138470010300201000030000600201000030000140260140260115002110910400101000010000010100000110000000100001011100314021223214020140000101010100002000040010140683140683140683140683140683
700241406821053100100001000014024513971613932612956525800134001030000100003001030000100001266662670361320111930014001201404651406821309740313180670010300201000030000600201000030000140682140682115002110910400101000010000010100033010003610100001010000314021222313955840000101010100002000040010140261140261140261140261140261
700241402601049000000001100014024513971613932612956525800134001030003100003001030000100001264728669283920079595114001201402631401221307770313138470010300201000030000600201000030000140260140260115002110910400101000010000010100000010000000100001011200314041224313997840000101010100002000040010140683140459140683140459140683
70024140682105311000000100001402451394881395521295652580013400103000310000300103000010000126667166931272011193011402360140260140036130777031313847001030020100003000060020100003000014026014026011500211091040010100001000001010000011000000010000101000031403122241397814001301010100002000040010140261140261140037140261140558
7002414039210490000000010311000140670140235139972129985258001340010300061000030010300001000012685786723917201402951140434014026014026013077703131384700103002010000300006002010000300001402601402601150021109104001010000100000101000001100000001000010013003140512223140201400000100100002000040010140261140261140261140215140261

Test 4: throughput

Count: 8

Code:

  ld2r { v0.1d, v1.1d }, [x6]
  ld2r { v0.1d, v1.1d }, [x6]
  ld2r { v0.1d, v1.1d }, [x6]
  ld2r { v0.1d, v1.1d }, [x6]
  ld2r { v0.1d, v1.1d }, [x6]
  ld2r { v0.1d, v1.1d }, [x6]
  ld2r { v0.1d, v1.1d }, [x6]
  ld2r { v0.1d, v1.1d }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)030e0f1e1f22233a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
240205400633000037010140026100025240170100160070800001001600008000050041795445600141040022400644006499733999924010020080000160000200800001600004005940059118020110099100100800008000001008000037800310318000061313751100216224005600800001600001004004240060400604006040060
24020440069300003700004004410100252401701001600708000010016000080000500419911188385000400404005940059997339999240100200800001600002008000016000040059400591180201100991001008000080000010080000378000013180031610375110021622400561410800001600001004004240042400604006040042
24020440059300004100004004481002524017010016007080000100160000800005004180504560014004004040059400599973310003240100200800001600002008000016000040059400591180201100991001008000080000110080000378003504680031600375110021622400561010800001600001004006540042400604006040060
2402044005930000000014002610002524017010016007080000100160000800005004184984560016004004040041400649973399992401002008000016000020080000160000400594005911802011009910010080000800000100800003780031031800316131375110021622400561010800001600001004006040042400604006040060
240204400593001137000040044010025240100100160082800001001600008000050041985045600140040022400594006499733100222401002008000016000020080000160000400414005911802011009910010080000800000100800003780031031800310131375110021622400561410800001600001004020340060400424006040065
24020440059300003701004004901002524010010016007080000100160000800005004205715439994004004040041400419973310017240100200800001600002008000016000040059400411180201100991001008000080000110080000378003103180000013137511002162240038010800001600001004004240060400604006540065
2402044005930000000014004410802524010010016000080000100160000800005004219991920972004004040059400599973399992401002008000016000020080000160000400594004111802011009910010080000800000100800000800000318000061005110021622400381010800001600001004006540042400604004240060
2402044005930000370100400491010025240170100160000800001001600008000050042231745600140040040400644005999733999924010020080000160000200800001600004005940059118020110099100100800008000001008000041800000318003101310511002162240061140800001600001004004240060400604006040042
2402044005930000370100400260002524017010016008280000100160000800005004199421886788004004040041400419973310022240100200800001600002008000016000040041400591180201100991001008000080000010080000378003103180035613137511002162340038010800001600001004006040060400424006040042
24020440059300110010040044101002524010010016007080000100160000800005004179504560014004002240041400419973310017240100200800001600002008000016000040059400411180201100991001008000080000010080000378000003180031613137511002162240061100800001600001004006040060400424004240042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03mmu table walk instruction (07)090e0f1e22233a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)dadbddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24002540055300001037000400401111252400641016005480000101600008013550419236345726404011940044400559996310035240010208000016000020800001600004005540055118002110910108000080000110800003008000002480024612430502041600434005206680000160000104004240056400604005640221
2400244005530000113000040026111125240064101600548000010160000800005041875339200180400364005540059999631002224001020800001600002080000160000400554005511800211091010800008000001080000300800001248000060240502041600424005600680000160000104005640056400424005640056
2400244005530000003000040026111125240010101600548000010160000800005041875739200140400364005540059999631002124001020800001600002080000160000400554005511800211091010800008000001080000300800240248002400030502031600554005206680000160000104006040042400424006040042
24002440055300000030100400401111252400641016005480000101600008000050420942392001404003640055400439996310035240010208000016000020800001600004005540055118002110910108000080000010800003008002402480024612430502041600444005206680000160000104005640042400564005640056
24002440055300001030100400401111252400641016007080000101600008000050418496392001414003640041400559996310035240010208000016000020800001600004004140055118002110910108000080000010800003008002402480024012430502051600354005200080000160000104005640056400564005640056
240024400553000000301004004011025240010101600548000010160000800005041865519301860400364005540055999631002124001020800001600002080000160000400554004111800211091010800008000001080000300800240248000061030502051600554005206080000160000104005640042400564005640042
240024400553000110010040040111125240064101600548000010160000800005041955340000020400364005540059999631003624001020800001600002080000160000400554004111800211091010800008000001080000008002412480000612430502031600324005200680000160000104005640056400564005640060
240024400553000000300004004000252400101016005480000101600008000050422010392001614003640055400559996310021240010208000016000020800001600004004140055118002110910108000080000010800003008003102480024602430502041610434005606680000160000104005640057400564005640056
24002440055300000030100400401111252400101016007080000101600008000050418992392001404002240055400559996310035240010208000016000020800001600004004140055118002110910108000080000010800003008002412480024612430502041600444005206080000160000104005640042400564004240042
240024400553000111000040040011252400641016007080000101600008000050418582392001404003640055400559996310035240010208000016000020800001600004005540055118002110910108000080000010800003008002402480024013137502041600344005206080000160000104004240056400424005640056