Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2R (8B)

Test 1: uops

Code:

  ld2r { v0.8b, v1.8b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.004

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.004

retire uop (01)cycle (02)03mmu table walk instruction (07)09l2 tlb miss instruction (0a)0e0f1e1f22243a3f4951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
63005282092121611201200052542800001606130002004100020001000500023804502278128157280533103000100020001000200028080279601161001100010000100021001001000202140551053573213498648194193377383316404127904139051244212895100020002817328511285422812828090
63004280162121401611201049222788001659130042000100020001000500023865102281928028282623103000100020001000200028445285591161001100010000100021000001000202139821054971943520443195913414382410384227824140161306712585100020002811228116281732830028291
63004280722101601111000051982794301593130042004100020001000500023818202280728073281343103000100020001000200028505280721161001100010000100001000001000202135851047973533562943198723411382614394027786152441236813512100020002803728134283012807728133
630042846321015012113801049582783211603630062000100020001000500023852202272628039286003103000100020001000200028398279761161001100010000100021000001000102142311050473083448738195323502382614433828106136541211012787100020002811328550282032804728635
6300427972210140600288005005278970159623006200410002000100050002384840227842805428486310300010002000100020002828228050116100110001000010002100003100020214205977173073308445194003425382515454027890141221261113230100020002799028601280182816728108
6300428069210130900200052252786001646330042004100020001000500023802302272728035281043103000100020001000200027998281951161001100010000100021000001000202141051027572293479541195303529382711383927778139121222412960100020002810128124283202824828248
63004281122111201200000053002775001601330062004100020001000500023864602277428166280893103000100020001000200028275281291161001100010000100021000041000200138431059672693123738193673431382515434427826142821204212871100020002804128196281112800228257
6300428035214160160120005215279790159653004200410002000100050002388220227942795728136310300010002000100020002863128099116100110001000010002100000100000213095101687358347154019364341538299394327828135791215912707100020002817028157287182809428571
6300428536214160160020005188280180161953002200410002000100050002380820228062818728199310300010002000100020002809428190116100110001000010002100010100010013895103927327353343419456335238298403727808151921242612766100020002813528568281422865028086
6300428109211130160000104911279250163993006200010002000100050002386720228242808228027310300010002000100020002812228164116100110001000010002100010100020214126988972863506641196243138383113393928023146971319614021100020002823128341282742814428101

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.8b, v1.8b }, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0053

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514005310491001110020100014068713945013934512935925801064010030006100003010030000100001264288669309120082149114002901400561400561305643131152701003020010000300006020010000300001400561400411150201100991004010010000100000100100022010001001100000111103210011281113957340000909100002000040100140057140057140057140062140152
70204140041104910001100340100114004113945013934812935925801064010030006100003010030000100001264288669368220082149014001701400561400411305493131137701003020010000300006020010000300001400411400411150201100991004010010000100000100100022110002011100001111003210011281013956840000969100002000040100140057140042140057140057140057
7020414005610491100110020000014004113963813933212936225801034010030006100003010030000100001264288669382920082149114003201400411400561305493131264701003020010000300006020010000300001400561400411150201100991004010010000100001100100013110002001100001101103248011281113955440000099100002000040100140057140057140073140042140054
7020414005610491101110020100014004113945013934812936225801034010030006100003010030000100001264304669382920080361114003201400531400411305493131137701003020010000300006020010000300001400411400531150201100991004010010000100000100100012010001001100000101003210011281113956540000999100002000040100140057140057140042140042140057
7020414005610491000000010000014004113943913934812936225801064010030006100003010030000100001264288669382920080361114003201400561400561305643131152701003020010000300006020010000300001400411400531150201100991004010010000100000100100012110002021100001111003210011281113955440000060100002000040100140042140054140054140054140057
7020414005310491000000020000014004313963813933212935925801064010030003100003010030000100001264288669382920080361114003201400561400531305643131149701003020010000300006020010000300001400561400531150201100991004010010000100000100100012110002111100001111003210011281113956840000006100002000040100140057140042140057140042140057
7020514004110491100010020000014007713959213934812936225801064010030006100003010030000100001264288669309120082149114001701400411400561305643131152701003020010000300006020010000300001400531400411150201100991004010010000100000100100032110001111100001101103210011281113956840000999100002000040100140057140057140057140057140057
702041400411049100100002000001400411396381393541293622580106401003000610000301003000010000126430466930912008214911400320140041140041130564313115270100302001000030000602001000030000140056140041115020110099100401001000010000010010001111000201184100001111103210011281113961240000990100002000040100140057140057140054140057140057
7020414004110491101000020000014004113963813933212935925801064010030006100003010030000100001264304669382920080361114003201400561400411305493131152701003020010000300006020010000300001400561400531150201100991004010010000100000100100033110001001100000111003210011281113956540000000100002000040100140057140057140057140057140042
702041400561049110000002000001400381396381393321293622580106401003000610000301003000010000126438166938292008036111400170140043140056130549313120270100340741090233836675301127632868142436142588271502011009910040100100001000011001003441100410032509100001111003868012722414129640146990100002000040100142300140042140055140057140170

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140052104900010010001830100014003513944613934312936025800104001030000100003001030000100001264719669338820079451014002431400471400351305650313117570010300201000030000600201000030000140035140047115002110910400101000010000110100001100000101000010000314091205413957240000966100002000040010140051140051140051140051140149
7002414005010490000000000100000140035139487139325129353151800104001030003100003001030000101981264771669279120081263014002301400351400471305680313117370010300201000030000600201000030000140035140035115002110910400101000010000010100001100000161000010000314071207413957240000099100002000040010140051140051140051140048140051
70024140050104900000001205980000114002013949113932512935625800104001030003100003001030000100001264771669279120079451014002601400501400471305680313117570010300201000030000600201000030000140035140053515002110910400101000010000010100001100000001000001000314051224613984440000999100002000040010140051140036140095140048140051
700241400501049000000000010100014002013949113933812935625800134001030003100003001030000100001275691669298320081697014001101400391404391305680313117570010300201021130000600201000030000140035140049115002110910400101000010000110100051100000001000001000314061217413957240000909100002000040010140048140036140048140048140048
700241400501048000000001010100014003213944613933812935625800134001030003100003001030000100001264719669338820081263014002601400351400471305680313117470010300201000030000600201000030000140050140047115002110910400101000010000010100001100000001000011000314051204313956940000066100002000040010140036140051140036140036140056
700241400501049000000000010000014003213948713934312935625800134001030003100003001030000100001264719669353520081697014002601400351400501305530313116170010300201000030000600201000030000140050140047115002110910400101000010000110100000100000001000010000314071207413955740000069100002000040010140051140051140036140036140051
7002414014510490000000000100000014003513948713933812935625800134001030003100003001030000100001264771669279120079451114002301400501400501305680313117570010300201000030000600201000030000140035140047115002110910400101000010000110100001100000031000000000314041204513957240000999100002000040010140036140051140048140048140051
700241400501049000000000090000014003713949113932512934125800134001030003100003001030000100001264748669353520081263014002301400501400501305680313117570010300201000030000600201000030000140050140047115002110910400101000010000110100001100000031000710000314051205313957240000690100002000040010140048140037140051140051140051
700241400501049000000000010000014004013949113933812935625800134001030003100003001030000100001264748669338820081697014001101400351400501305530313117570010300201000030190600201000030000140065140057115002110910400101000010000010100000100000001000000000314041214313956940000600100002000040010140048140048140036140053140052
7002414005010490000001000101000140020139446139325129353258001340010300001000030010300001000012647716692791200794510140026014003514005013056803131161700103002010000300006002010000300001400471400351150021109104001010000100001101000011000000010000010103140312045139557400009915100002000040010140048140036140095140051140038

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.8b, v1.8b }, [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0260

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f191e1f2223243a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514070110520000102010001400211397701393261295652580100401003000310000301003000010000126431967036132011193014023601400361402601305323131777701003020010000300006020010000300001402601402601150201100991004010010000100001100100000110002000110000101000032104129431397784000010010100002000040100140037140261140261140264140261
702041402631050000000108810001402491397701395521301892580103401003000310000302533000010000126623267036132011193014023601402601402601313913131127701003020010000300006020010000300001402611402601150201100991004010010000100000100100000110000000010001101000032103129441397874000010100100002000040100140888140261140037140261140037
7020414003610500000001000001402451404391395551293425980149401003000310000301003000010000126624167036132011193014023901402601402601307807313198270100305161010630475605161010730000140260140263115020110099100401001000010000010010001211000318212328100001010000321041294413977540000101010100002000040100140037140037140261140037140262
702041408871049000110130100014024513940513955213018925801004010030003100003010030000100001272055670361320202004140863014026314068413076719131355707783034910266304706148610054303161402601402601150201100991004010010000100000100100000110000000310000001000032104129441403964000001010100002000040100140261140261140263140261140264
7020414026010500000000000001400211400401398021293422580100401003000310000301003059010000127205566969152011193014023601402601408871307673131355701003020010000300006020010000300001408871402601150201100991004010010000100000100100000110000000010000100000032104129441397724000013010100002000040100140261140261140261140888140037
7020414003610500000001000001402451397701395521293422580100401003000310000301003000010000126623267036132011193014023601400361402601307673131355701003020010000300006020010000300001408871400361150201100991004010010000100000100100000010000000010000101000032104129431397724000010100100002000040100140037140261140037140261140261
70204140887105100000010100014024513977013955212934225801034010030000100003010030000100001266232670361320202004140863014003614026013076731313557010030200100003000060200100003000014026014026011502011009910040100100001000011001000001100000000100001311010321041294414019140000660100002000040100140459140679140459140468140679
7020414067810541110001000000140666140216139967129984258010640100300031000030100300001000012701176723625201402951404340140678140458131188211317837010030200100003000060200100003000014067814067811502011009910040100100001000011001000220100020021100000111000321041293314018840000666100002000040100140679140679140459140679140459
70204140678105211000020010114066613986613996712998125801034010030006100003010030000100001270117671306920171978140654014068114067813118331317737010030200100003000060200100003000014067814067811502011009910040100100001000001001000221100030011100000101100321041294314018940019006100002000040100140459140679140679140459140459
70204140678105210010020010114066514026213996712998125801034010030006100003010030000100001270024672362520171978140658014067814067813118331317737010030200100003000060200100003000014067814045811502021009910040100100001000001001000210100020001100001101200321041294314018840000669100002000040100140679140679140679140679140682

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0260

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cfd2l1i cache miss demand (d3)d5map dispatch bubble (d6)dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140260105100100000010000140245139488139552129565258001340010300031000030010300001000012647286703613200795951140236014026014026013077731313847001030020100003000060020100003000014003614026011500211091040010100001000011010000011000001001000011031400020122001761397814000010010100002000040010140037140261140261140261140261
70024140260105100000000010000140245139716139552129565368001340010300001000030010300001000012647286703613201119301140236014026014026013077731313847001030020100003000060020100003000014026014026011500211091040010100001000001010002011000000001000000031400071220051713978140000101010100002000040010140261140261140261140261140037
70024140260105100000000000100140245139716139552129342258001040010300031000030010300001000012666626692839201119301140236014026014026013140131313847001030020100003000060020100003000014026014026011500211091040010100001000001010000011000000001000011031400061220051713955840000101010100002000040010140261140261140261140037140261
70024140260104900100000010100140021139716139552129565258002740010300031000030010300001000012647286703613201119301140012014026014006913078131313847001030020100003000060020100003000014003614026011500211091040010100001000001010000011000001001000010031400051200017713978140000101010100002000040010140261140261140261140261140261
700241402601050000000000101001402451397161393261295652580013400103000010000300103000010000126666267036132007959511402360140260140260130777313138470010300201000030000600201000030000140260140260115002110910400101000010000010100000110000000010000110314000512200171713955840000101010100002000040010140261140261140261140261140261
700241402601050000000000101001400211397161395521295652580013400103000010000300103000010000126666267036132007959511402360140260140260130777313138470010300201000030000600201000030000140260140260115002110910400101000010000010100000110000000010000110314000612200111713978140000101010100002000040010140037140261140261140261140037
700241402601051000001100160100140245139716139552129565258001340010300031000030010300001000012666626692839201119301140236014003614026013055431313847001030020100003000060020100003000014026014026011500211091040010100001000001010000011000000001000010031400071220061713983440000101010100002000040010140219140037140304140261140261
70025140296105000000000010100140021139716139552129565258001340010300031000030010300001000012666626692839201119301140236014003614003613077731313847001030020100003000060020100003000014026014026011500211091040010100001000001010000011000000001000011031400017122007171395584000001010100002000040010140261140261140261140155140261
700241402601051000000000100100140245139747139561129565258001340010300031000030010300001000012666626703613201119301140236014026014026013077731313847001030020100003000060020100003000014003614026011500211091040010100001000001010000011000000001000010031400071220051713978140000101010100002000040010140261140037140261140261140261
700241402601050000000000000001402451397161393261295652580010400103000310000300103000010000126666267036132011193011402360140260140260130554313138470010300201000030000600201000030000140260140036115002110910400101000010000010100000110000000010000100314001181220017713978140000101010100002000040010140261140261140261140261140261

Test 4: throughput

Count: 8

Code:

  ld2r { v0.8b, v1.8b }, [x6]
  ld2r { v0.8b, v1.8b }, [x6]
  ld2r { v0.8b, v1.8b }, [x6]
  ld2r { v0.8b, v1.8b }, [x6]
  ld2r { v0.8b, v1.8b }, [x6]
  ld2r { v0.8b, v1.8b }, [x6]
  ld2r { v0.8b, v1.8b }, [x6]
  ld2r { v0.8b, v1.8b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5009

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f181e22233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2402054007030011101107910140040151806025240204100160054800001001600008000050041842354408241400510400544005599793100232401002008000016000020080000160000400704007011802011009910010080000800000100801491741800690113480052615301610511002162240052600800001600001004005640164400564005640071
24020440055300110011041101400551403060252402041001601048000010016000080000500415845544082414005104007040070997931002324010020080000160000200800001600004007040055118020110099100100800008000001008001718568005310073801786169441600511002162240067660800001600001004007140056400714007140071
2402044005530010000007810140055211831602524019610016010680000100160000800005004184234000034140051040070400709974310028240100200800001600002008000016000040070400651180201100991001008000080000010080018160800380027580052003701610511002162240052060800001600001004007140071400714005640071
24020440070300110011079102400551518306025240202100160096800001001600008000050041582754408221400510400554005599793100282401002008000016000020080000160000400704005511802011009910010080000800000100800181656800681011060800530169441610511002162240067060800001600001004005640056400564007140066
2402044005530011000007800140040141830002524019610016010480000100160000800005004158724000034140051040055400709973310013240100200800001600002008000016000040070400551180201100991001008000080000010080148180800380007580022615301600511002162240067000800001600001004007140071400714007140056
24020440065299101000078001400401400002524020410016004880000100160289800005004158425440822140051040070400559973310014240100200800001600002008000016000040056400701180201100991001008000080000010080017170800691015780052606801610511002162240067660800001600001004007140071400714005640056
2402044007030010001007900140055141806025240194100160104800001001600008000050041932840000321400510400704007099733100122401002008000016000020080000160000400704005511802011009910010080000800000100800181656800682004180051616801610511002162240052660800001600001004007140071400714020040056
240204400703001100010791004005514183000252402041001601048000010016000080000500415605544082414015004005540055997931001324010020080000160000200800001600004005540055118020110099100100800008000001008001716568006910042800226137441610511002163240052000800001600001004007140071400714007140056
240204400703001000110780014004021183060252401961001600968000010016000080000500418423400003614005104005540055997331001324010020080000160000200800001600004005540070118020110099100100800008000001008001818568003800042800226068441610511002162240067690800001600001004005640056400714007140056
24020440201300100001041000400391418010252401561001601048000010016000080000500418418544082214005104007040070997331001324010020080000160000200800001600004007040070118020110099100100800008000001008001616568006801072800530138411600511002162240067660800001600001004007140066400714007140071

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f181e1f223a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0l1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2400254005530000010030000400400111102524001010160054800001016000080000504185181845674040036040055400559996031003524001020800001600002080000160000400554005511800211091010800008000001080000030800240000800246124300050200017161513400526680000160000104005640056400564005640056
24002440055300010110200024005021801252400341016002480000101600008000050415287544008204003604005540055999603100352400102080000160000208000016000040055400551180021109101080000800000108000003080000000248002761164116050200013161212400449080000160000104004840066400664006640066
24002440065300100010620134005011818125240088101600828000010160000800005041528754400820400360400554005599960310035240010208000016000020800001600004005540056118002110910108000080000010800000080024010248002460164116150200013161713400449080000160000104004840048400484006640066
24002440065300100000630004005021818125240092101600828000010160000800005041582527200360400280400654006599960310027240010208000016000020800001600004006540065118002110910108000080000010800000080024000248002460164116250200015161313400629980000160000104006640066400484004840048
24002440065299100000300104004001118125240034101600828000010160000800005041861754401480400460400654006599960310035240010208000016000020800001600004005540047118002110910108000080000010800000308002400024800240116410050200012161212400386680000160000104005640056400424005640042
2400244005530001100030010400410111102524006410160054800001016000080000504186923920014040046040065400659996027100452400102080000160000208000016000040065400651180021109101080000800000108001617418005310257800386124300050200013161414400386080000160000104006640066400664004840048
240024400653001000003000040026000025240064101600008000010160000800005041874539200140400460400474006599960310045240010208000016000020800001600004006540065118002110910108000080000010800171842800160015780037612400050200016161016400629980000160000104005640056400564004240056
240024400553000110001901240032218181252400641016005480000101600008000050415825392001414004604006540047999603100272400102080000160000208000016000040065400651180021109101080000800000108001617418005210156800376124300050200016161511400526080000160000104005640056400564004240056
24002440041300011000620034005020181252400901016002480000101600008000050418658544008004003604005540055999603100272400102080000160000208000016000040047400651180021109101080000800000108001717418005310157800376124300050200017161515400526680000160000104005640056400564004240056
240024400553000111102001340050201812524009210160082800001016000080000504186175440082040036040056400559996031002124001020800001600002080000160000400554004111800211091010800008000001080000030800240009368002461534116250200010161014400620080000160000104006640066400664004940066