Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (multiple, 2S)

Test 1: uops

Code:

  ld2 { v0.2s, v1.2s }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.008

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.008

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e1f22243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6300528299212212101008501052512782201016020300420061000200010005000238641222815282452836331030001000200010002000281522793111610011000100011003231004123100220300138141029772933535053195473401382715474627975140331236412979100020002802928199280432825428035
630042851221001212100550005262279330111601130082008100020001000500023884922776283032844631030001000200010002000280712807511610011000100001003321003002100022310141001049673323492046196243513382611494627798139551263812820100020002807328044280162800228417
630042802421101212100500050862798500115949300620061000200010005000238791622791282602853431030001000200010002000281002801311610011000100001002331005213100123210138641012971923329143194933407382119454227947140011221412811100020002835028202281702795028111
63004281152130111010030105056278840011599230062006100020001000500023918922775281362840031030001000200010002000282022815411610011000100001002121003021100022310134221008273113486046197953440382913434027809150541230512862100020002804128133280352824428307
630042810521201212100220105203280020001606830102006100020001000500023918122754283162851931030001000200010002000283332824111610011000100001002221004013100122310141431027472333556140198813205382315424227767145431235813968100020002850028445284522825628022
630042838721001114100290105206281460111607830082006100020001000500023882822796283602811531030001000200010002000278222828411610011000100001003221002013100222310138581014872053420048197573393382314443927994145061250813687100020002839528008282542805828110
630042843121201212110287010511927903000160543010201010002000100050002387832276828552282053103003100020001000200028418280941161001100010000100133100301210012231013922957871733373151196083370382514424527920140981241012774100020002846028401282782807228227
630042819721201212100501048092794701116083300620061000200010005000238621422769283312845531030001000200010002000285422823811610011000100001003231003023100222210137191001371333497048194443436382814414427985144801280712912100020002821728144282232810328196
63004281072130131210090104946279380001617030102008100020001000500023886112277028347282943103000100020001000200028083281611161001100010000100323100502310013221114140977574023550047195303447382217474927851140081235512691100020002820228226281932809728455
63004280862110130000050005346279230101623730062010100020001000500023874922755285012835131030001000200010002000279712793411610011000100001002321005003100222210133641003871753245047196773400382515484427874145011224812959100020002825828156282752808628158

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.2s, v1.2s }, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233f4d4e4f5051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514005810490000001010140020139608139346129360025801064010030006100003010030000100001266450669411820082745014003601400411400601305683131147701003020010000300006020010000300001400541400511150201100991004010010000100000100100000110003000110000110100321011281113957240000131013100002000040100140058140061140042140061140055
7020414003510490000001000140039139608139346129360025801034010030003100003010030000100001264354669373120079451014001101400541400541305593131150701003020010000300006020010000300001400351400511150201100991004010010000100000100100000010000001010000101000321011281113956640000131010100002000040100140100140055140055140055140052
7020414005410490000001010140039139404139346129360081801004010030003100003010030000100001264390669373120081843014001101400351400351305313131150701003020010000300006020010000300001400351400511150201100991004010010000100000100100000010000000610000001000321011331113954440000131013100002000040100140055140055140055140055140036
7020414003510490000000000140036139608139346129360025801004010030003100003010030000100001264310669358420081843014003001400541400541305623131150701003020010000300006020010000300001400351400351150201100991004010010000100000100100000110000000010000100000321011331113954440000131313100002000040100140052140052140055140036140036
702041400541049000000000014003913960813934612934102580103401003000010000301003016010000126439066937312008184301400300140054140054130568313112670100302001000030000602001000030000140051140035115020110099100401001000010000010010000011000000001000010100032101128111395664000001013100002000040100140055140055140052140055140052
702041400351049000000100014003913960813934412936002580103401003000010000301003000010000126439066937312008334001400300140054140051130531313112670100302001000030000602001000030000140054140051115020110099100401001000010000010010000011000000001000000000032101133111395444000001313100002000040100140036140055140055140055140036
7020414005410490000001010140039139608139346129341025801034010030003100003010030000100001264390669373120081843014001101400541400541305623131150701003020010000300006020010000300001400541400511150201100991004010010000100000100100000110000000010000100000321011281113954440000131313100002000040100140055140055140036140055140055
70204140051104810000022010140039139608139325129360057801034010030003100003010030000100001264390669373120081843014001101400511400511305623131150701003020010000300006020010000300001400541400351150201100991004010010000100000100100000110000000010000100000321011281113956640000131313100002000040100140055140063140036140036140036
7020414003510490001001001140248139608139346129374025801034010030003100003010030000100001264310669279120079451014003001403571403461305683131128701003020010000300006020010000300001400551400541150201100991004010010000100001100100000110000020010000101000321011281113956940000131313100002000040100140055140055140055140055140052
7020414005410490001001300014003913960813932512936002580100401003000310000301003000010000126433766937312008184301400300140054140054130531313115070100302001000030000602001000030000140054140038115020110099100401001000010000010010000001000001031000000100032101128111395664000013010100002000040100140052140055140055140052140052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0047

retire uop (01)cycle (02)03l2 tlb miss data (0b)0e0f191e1f22233f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)cdcfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
700251400471049000010101400321394461393421293552580013400103000310000300103000010000126474866933882008126314002301400471400471305653131173700103002010000300006002010000300001400471400471150021109104001010000100000101000001100000010000110314000041202213956940000666100002000040010140048140048140098140048140048
7002414004710490000100014003213945113933812935325800134001030003100003001030000100001264748669338820081263140023014005014004713056531311737001030020100003000060020100003000014010814004711500211091040010100001000001010000011000068010000010314000021202213956940000666100002000040010140048140054140088140051140048
700241400471049000010101400321395311393701293552580013400103000310000300103000010000126474866933882008126314002301400471400471305923131173700103002010000300006002010000300001400471400471150021109104001010000100000101000001100000010000110314000031202213956940000666100002000040010140048140134140052140048140048
700241400471049000010101400321395431393501293532580013400103000310000300103000010000126474866933882008126314002301400351400471305653131173700103002010000300006002010000300001400471400471150021109104001010000100000101000001100000010000110314000021202213956940000666100002000040010140048140048140094140048140048
700241400471049000010101401211395081393381293532580013400103000310000300103015910000126474866933882008140714002301400471400471305653131173700103002010000300006002010000300001400471400471150021109104001010000100000101000001100000310000110314000021202213956940000666100002000040010140049140098140079140048140048
7002414004710490000101014003213948213933912935325800134001030003100003001030000100001264748669338820081263140023014004714004713056531311737001030020100003000060020100003000014004714004711500211091040010100001000001010000011000000100001103140000212022139569400006617100002000040010140048140099140049140051140049
700241400471048000010001400321394881393431293532580013400103000310000300103000010000126474866933882008126314002301400471400471305653131173700103002010000301886002010000300001400471400471150021109104001010000100000101000001100000010000110314000021202213956940000666100002000040010140048140105140062140049140048
700241400471049010010101400321395021393401293532580013400103000310000300103000010000126474866933882007945114002301400471400471305653131173700103002010000300006002010000300001400471400471150021109104001010000100000101000001100000010000110314000011202113956940000666100002000040010140048140048140121140051140048
700241400471049000060101400321394991394131293542580013400103000310000300103000010000126474866927912008126314002301400471400471305653131173700103002010000300006002010000300001400471400471150021109104001010000100000101000001100000010000110314000021202213956940000666100002000040010140048140054140110140087140048
700241400471049000010101400321394491393381293532580013400103000310000300103000010000126474866933882008126314001101400471400471305653131173700103002010000300006002010000300001400351400471150021109104001010000100001101000001100000010000110314000131202213956940000666100002000040010140048140048140102140059140048

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.2s, v1.2s }, [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0685

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70205140881105410001210014044314027913974712968163801064010030006100003010030000100001269962672382120172574114066114068214068513119031317807010030200100003000060200100003000014045814068211502011009910040100100001000011001000221100020111000011110032101129111401954000001013100002000040100140686140459140686140686140686
702051404621054101012100140670140279139978129987258010640100300031000030100300001000012699626723965201725741140661140685140685131190313178070100302001000030000602001000030000140458140682115020110099100401001000010000110010002111000100110000111120321011291114019240000101013100002000040100140683140686140686140459140683
702041406821054110002000140670140279139974129987258010640100300061000030100300001000012680876713069201725741140661140458140685131190313178070100302001000030000602001000030000140685140685115020110099100401001000010000110010003211000100110000111100321011291114019540000131010100002000040100140683140683140686140686140683
702041406851054111002000140670140279139974129985258010640100300061000030100300001000012699466723965201725741140434140685140685131071313177770100302001000030000602001000030000140685140682115020110099100401001000010000110010002211000200110001110110321011291114019540000131013100002000040100140686140686140686140686140686
702041406851053100002000140670140276139974129985258011640100300061000030100300001000012699626723965201725741140661140458140685131190313177770100302001000030000602001000030000140685140458115020110099100401001000010000110010002111000101110000111100321011291114019540000101013100002000040100140686140683140459140686140686
70204140458105411100200014067014027613997412998725801064010030006100003010030000100001269962672396520140295114066114068514045813119031315537010030200100003000060200100003000014068514068211502011009910040100100001000011001000311100010111000011011032101129111399694000013100100002000040100140683140683140683140683140686
70204140685105310000100114067314027913997412998725801034010030006100003010030000100001269962672396520172574114066114068514068513118731317807010030200100003000060200100003000014068514068211502011009910040100100001000011001000231100010011000011112032101129111401954000013013100002000040100140683140683140459140686140686
70204140682105310100681001406671402791399741299852580103401003000610000301003000010000126996267130692017257411404341406851404641311903131780701003020010000300006020010000300001406851406821150201100991004010010000100001100100021110002011814110007111100321011291113996940000101313100002000040100140685140683140459140459140686
702041404581054110002000140670139866139974129987258010640100300061000030100300001000012699626723821201725741140434140685140685131187313178070100302001000030000602001000030000140685140682115020110099100401001000010000010010002301000101110000111102321011291113997040000131313100002000040100140687140459140686140459140459
702041406851053101002000140670140282139972129985258010340100300031000030100300001000012699466723965201725741140661140685140685130964313177770100302001000030000602001000030000140685140682115020110099100401001000010000110010003111000201110000111120321011291114019240000101313100002000040100140459140686140686140686140686

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0682

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7002514068210540110001002600001140443140238139747129987258001340010300061000430010300001000012704936723965201725740140434014068214068213119731315837001030020100003000060020100003000014068514068211500211090104001010000100001101000311100020111100001111100314217122121614020140000131013100002000040010140686140459140459140686140683
7002414045810531110200011900001140670139826139974129986258001640010300061000030010300001000012685786723965201725740140661014068514045813120031318017001030020100003000060020100003000014041914068211500211090104001010000100000101000310100060001100001111100314213122171214020440000101010100002000040010140664140684140683140683140686
700241406821054110010010201002140443139826139975129989258001340010300061000030010300001000012704936713069201725740140661014045814045813120031318097001030020100003000060020100003000014068214068211500211090104001010000100000101000321100010011100001111200316911122161514020140000131013100002000040010140686140686140459140686140686
70024140685105311001000020000114067014023813975012998725800134001030006100003001030000100001270493671306920172574014066101406821404581311973131812700103002010000300006002010000300001406851406821150021109010400101000010000010100012110001000110000110100031421612115121399784000013013100002000040010140683140686140459140686140683
700241406851052111000000201002140670139826139974129762258001640010300061000030010300001000012705476699180201725740140662014068214068213120031318067001030179100003000060020100003000014068514068211500211090104001010000100000101000330100030011010000111110031421312114181402044000013013100002000040010140459140459140460140687140595
700241404581054110010000101001140443140235139974129987258001640010300061000030010300001000012704656723965201734380140658314045814068513120031315837001030020100003000060020100003000014068214063111500211090104001010000100000101000221100020001100001111100314213122121614020440000101010100002000040010140683140459140686140684140686
70024140458105411001000020000114067014023813974712998725800164001030006100003001030000100001270493671306920173015014043401404581406851309743131583700103002010000300006002010000300001404531406861150021109010400101000010000010100012110002000110000111100031421412211201400944000013013100002000040010140683140683140683140683140686
70024140685105411102000020000214066713982613997412976225800134001030020100003001030000100001270493672396520173015014043401406851404581312003713150271022300201005330316600201010630159140504140710215002110901040010100001000001010002111000110371000311011003144141221410140201400000010100002000040010140385140459140686140634140683
700241406871054111011100201001140667139750139747129985258001640010300061000030010300001000012704936713069201725740140661014068214045813097420131812703493033610106300006033810106303161402011402834150021109010400101000010000110100012110001010410000111100131421612115181402014000013010100002000040010140459140686140686140426140459
700241406831053111000100176000011406701402411399741297272580016400103000610001301523029510000127047467238212017257401406610140685140685130974201318097001030178100003000060020100003000014068214045821500211090104001010000100000101000211100040001610000111130031421412210141399784000013013100002000040010140686140459140686140686140686

Test 4: throughput

Count: 8

Code:

  ld2 { v0.2s, v1.2s }, [x6]
  ld2 { v0.2s, v1.2s }, [x6]
  ld2 { v0.2s, v1.2s }, [x6]
  ld2 { v0.2s, v1.2s }, [x6]
  ld2 { v0.2s, v1.2s }, [x6]
  ld2 { v0.2s, v1.2s }, [x6]
  ld2 { v0.2s, v1.2s }, [x6]
  ld2 { v0.2s, v1.2s }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03mmu table walk data (08)090e0f1e22233a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24020540059300010041101400498825240100100160082800001001600008000050041840345600141400454006440041997303100172401002008000016000020080000160000400594005911802011009910010080000800001100800003780000008003560310051102162240061181010800001600001004006540065400654006040060
24020440059300000041001400288025240182100160000800001001600008000050041900418450940400454004140064997303100222401002008000016000020080000160000400414006411802011009910010080000800000100800003780035008003560314105110216224006101014800001600001004004240042400604006040042
240204400593000000410004004410825240182100160000800001001600008000050041900654399941400454004140064997303999924010020080000160000200800001600004006440059118020110099100100800008000001008000037800350358003501314105110216224003801414800001600001004006540065400654006540060
240204400413000000411004004908252401821001600828000010016000080000500418996456001404002240059400419973031002224010020080000160000200800001600004006440059118020110099100100800008000001008000037800000358003561354105110216224003801414800001600001004006540065400654004240042
24020440064300000000004004488252401001001600008000010016000080000500418996543999404004540064400649973031002224010020080000160000200800001600004005940059118020110099100100800008000001008000041800351408000001314105110216224004701414800001600001004005140060400654006540060
2402044006430000000001400448825240100100160082800001001600008000050041897345600140400454006440064997303100222401002008000016000020080000160000400414005911802011009910010080000800000100800003780000008003100354105110216224005601414800001600001004006540042400604006040065
2402044004130000004110040049882524018210016000080000100160000800005004184034560014040045400594006499730310022240100200800001600002008000016000040064400591180201100991001008000080000010080000378003103580035600005110216224006101410800001600001004004240065400424006540065
2402044006430000004100040049810252401821001600828000010916000080000500418420456001404002240064400419973031001724010020080000160000200800001600004006440059118020110099100100800008000011008000037800000358003561354105110216224006101410800001600001004006040042400654006540042
240204400593000000901004004410025240170100160082800001001601928000050041795018450940400454006440064997303100172401002008000016000020080000160000400644005911802011009910010080000800000100800003780031008003561314105110216224003801410800001600001004006540065400654006540042
240204400642990000410004004988252401001001600828000010016000080000500418996456001404004540064400649973039999240100200800001600002008000016000040120400591180201100991001008000080000010080000080035031800356131410511021622400610100800001600001004006540065400654006540065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e2223243a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
240025400553000010110030100040040011110025240064101600548000010160000800005041872418456741400360400554005599960310035240010208000016000020800001600004005540055118002110901010800008000001080000030800240002480024610300005020116111400520680000160000104005640056400604005640056
24002440059300000000000100040040011110025240064101600548000010160000800005041874539200140400360400554004199960310035240010208000016000020800001600004005540055118002110901010800008000001080000030800240002780000602437000502011651400526680000160000104004240056400564004240056
240024400552990000000030000040040011110025240064101600548000010160000800005041874539200141400360400554005599960310035240010208000016000020800001600004005940055118002110901010800008000001080000030800000002480024612430000502011681400526680000160000104005640056400564005640056
2400244005530000000000010004004001000025240010101600548000010160000800005041874539200141400360400554004199960310021240010208000016000020800001600004005540055118002110901010800008000001080000030800240002480024612430000502011672400526680000160000104005640056400564004240056
240024400553000000000030000040040011110025240064101600548000010160000800005041902139200141400360400554005599960310023240010208000016000020800001600004005540055118002210901010800008000011080000030800240002480024612437000502011611400526680000160000104004240060400564005640056
24002440055300000000003010004004001111002524006410160054800001016000080000504187453920014140036040055400559996031003524001020800001600002080000160000400564005511800211090101080000800000108000003080024000248002461240000502011611400526680000160000104004240056400564005640042
24002440055300000000003000004004401111002524006410160054800001016000080000504187453920014140036040041400559996031003524001020800001600002080000160000400554005511800211090101080000800000108000003080024000248002461037001502011611400386680000160000104005640056400604005640056
240024400553000000000030100040040011110025240064101600548000010160000800005041873639200141400360400554005599960310035240010208000016000020800001600004005540055118002110901010800008000001080000030800240003180024612430000502011611400526680000160000104004240056400424004240056
240024400413000000000001000400260010002524008010160054800001016000080000504187453920014140036040055400559996031002824001020800001600002080000160000400554004111800211090101080000800000108000003580024000248002461030000502011611400526680000160000104005640056400424005640056
24002440055300000000003000004004001311002524006410160054800001016000080000504186923920014140036040055400559996031004524001020800001600002080000160000400654006511800211090101080000800001108001817418005310257800376152411610504611611400526680000160000104005640056400564005640056