Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2R (8H)

Test 1: uops

Code:

  ld2r { v0.8h, v1.8h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.004

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
630052951922102300230110020004852287810011718230082000100020001000500023866222766290912925231030001000200010002000291632916311610011000100001000021000010310000030012991928568463229978207933072382615605428425163871374314958100020002941129289294522934829256
630042938622002300210110020004638288150001728730002004100020001000500023808222662291062927831030001000200010002000291212908811610011000100001000001000030010000000012828902668923199960205673050382721576228303164101410115045100020002929529227292112925729225
630042939221902100210000000004591287660001724630002004100020001000500023819022689291252922831030001000200010002000291942924811610011000100001000031000020310002020012857914168693104659207163061382825595628402163741398214723100020002927229268292972928529295
630042927621902300170000030004572287480001718830002006100020001000500023928022717291322920731030001000200010002000291982917911610011000100001000031000010310003120012870924568053017761205553070382722555928381166441403215072100020002930129333293092934729273
630042922322002300170000000104633287680101718030002004100020001000500023812322662290072930331030001000200010002000292232916111610011000100001000031000030310000120012990916168643032865206833070382417556128383164471416415174100020002913629252293122934329259
6300429333219026001800000401048342883200017252300020061000200010005000238640227052907629204310300010002000100020002923529187116100110001000010000310000280610003020012912909068353042953205693078382612475328344163561396615177100020002928929293293092935629208
6300429255219018001800000000045982871000017309300420041000200010005000238904227002904329301310300010002000100020002921829220116100110001000010000010000009310002020012838923568383185955207233054382313555728365166551379415112100020002924729212293362923629238
63004293752200170016001000000463028786000171283000200410002000100050002386842272629203293633103000100020001000200029191291231161001100010000100002100103406100020000130439249680831851259206353051382518564928376161761368315022100020002926829315292722932429271
6300429298220021002400000000046012874200017194300020041000200010005000238680227142905929299310300010002000100020002910129126116100110001000010000210000200100030200128919655686130671153206713072382220565728418161171406315041100020002936929330293072932329329
6300429411220025002000000000046092895600017226300020041000200010005000238880227512908129361310300010002000100020002931229163116100110001000010000210000240610003000012972905968373230757206493101382722605628393164121382915285100020002928129262292902932529271

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.8h, v1.8h }, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0054

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)0e0f18191e1f22233f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514005110490000000001014003613960813932512934725801034010030003100003010030000100001264310669373120081843014003001400541400951305630313115070100302001000030000602001000030000140054140051115020110099100401001000010000010010000011000000100001101003210212823139566400000110100002000040100140052140036140036140055140036
702041400541049000000041080014003913960813932512936325801034010030003100003010030000100001264390669373120081843014001101400541400511305590313115070100302001000030000602001000030000140054140054115020110099100401001000010000010010000011000000100000020003210313333139566400000013100002000040100140055140055140036140036140055
70204140054104900000001000140020139561139325129435258010340100300031000030100300001000012643996693731200818430140011014005414005413053103131150701003020010000300006020010000300001400351400351150201100991004010010000100000100100000110000001000011000032103133331395664000001313100002000040100140036140055140055140036140036
70204140035104900000000000140020139404139325129363258010340106300031000030100300001000012643906693584200794510140027014005414005413056203131126701003020010000300006020010000300001400511400351150201100991004010010000100000100100000110000031000001000032103128331395664000013130100002000040100140036140113140055140036140055
702041400541049000000010001400391396081393441293631508010340100300031000030100300001000012643906692791200794510140030014005414005113056203131150701003020010000300006020010000300001400511401431150201100991004010010000100000100100000110000001000011000032473133331395444000013100100002000040100140055140055140036140052140036
702041400541049000000000101400391396081393441293652580103401003000010000301003000010000126439066937312008184311400350140054140051130562031311267010030200100003000060200100003000014005114005111502011009910040100100001000001001000001100001010000110000321031283313957240000101313100002000040100140055140052140036140052140055
70204140054104900000001000140020139608139346129363258010340100300001000030100300001000012643906693731200795950140586014005114005113053103131126701003020010000300006020010000300001400371400511150201100991004010010000100000100100030010000031000011400032373128331395644000001313100002000040100140036140057140055140055140057
7020414005410490000000100014002013940513935212941125801004010030006100003010030000100001264390669373120079451014003001400351400351305620313115070100302001000030000602001000030000140059140051115020110099100401001000010000010010000011000013100001000003210312833139810400430013100002000040100140273140474140036140162140255
702041400541049000000012000140041139608139344129366258010340100300031000030100300001000012643906693731200794511140011014010214005413053103131126701003020010000300006020010000300001400351400351150201100991004010010000100000100100000010000201000011000032107143331395644000013013100002000040100140055140055140036140055140036
7020414005410490000000000014003613956113932512936315380122401003000310000301003000010000126439066939742007945101400300140051140037130567031311507010030200100003000060200100003000014005414005111502011009910040100100001000011001000001100000010000110000321031285313956740000131313100002000040100140055140039140435140055140061

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0054

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e22233f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140102104901000061101400391395641393461293572580010400103000910000302903000010000126481966937312008184301400300140054140035130572313116170010300201000030000600201000030000140054140035115002110910400101000010000010100001100000310000110314061212413964540000101011100002000040010140055140062140052140055140036
7002414003510490000001001400201395641393251293602580013400103000310000300103000010000126481966935842008184301400110140054140035130572313117670010300201000030000600201000030000140054140051115002110910400101000010000010100001100000010000100314041204213961440000131313100002000040010140036140036140052140055140036
7002414003510490000001101400391395641393251293602580013400103000010000301573000010000126481966958882008184301400300140054140054130572313116170010300201000030000600201000030000140054140051115002110910400101000010000010100000100000010000110314041214213957640000131013100002000040010140055140036140055140036140052
70024140054104900000010014003913956413934612936025800134001030003100003001030000100001264783669373120081843014003001400511400541305533131179700103002010000300006002010000300001400541400511150021109104001010000100000101000001000000100011103140412142139580400001000100002000040010140055140036140055140036140055
7002414005410490000000001400201395641393441293412580013400103000310000300103000010000126481966937312008230401400300140035140054130553313117970010300201000030000600201000030000140035140051115002110910400101000010000010100001100000010000010314041214213957340000131313100002000040010140055140055140036140055140055
7002414005410490001001101400201394871393461293602580013400103000010002300103000010000126481966937312008184301400110140054140054130572313116170010300201000030000600201000030000140054140051115002110910400101000010000010100001100000310000110314041214513957340000131310100002000040010140036140055140055140052140036
700241400351049000000110140039139487139346129360258001340010300031000030010300001000012648196692791200794510140030014005414003513057231311797001030020100003000060020100003000014005414005111500211091040010100001000001010000110000061000011131405121421395934000013013100002000040010140036140055140055140036140036
70024140054104900000000014003613956413932512936025800134001030003100003001030000100001264719669279120079451014003101400541400511305533131179700103002010000300006002010000300001400351400511150021109104001010000100000101000011000000100000003140412024139573400000013100002000040010140036140036140055140055140055
7002514005110490000001010140020139564139344129341258001040010300031000030010300001000012647196692791200823041140030014003514005413055331311797001030020100003000060020100003018914005414005111500211091040010100001000001010000110000001000000031402120241395734000013013100002000040010140036140055140036140055140055
700241400351049000000600140020139564139346129360258001340010300031000030010300001000012648196693731200818431140030314005414005413057231311767001030020100003000060020100003000014005414005111500211091040010100001000011010000110000001000011031404121441396274000010013100002000040010140055140055140055140036140055

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.8h, v1.8h }, [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0685

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f181e22233a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70205140496105311111002001140670140279139974129762258010340100300061000030100300001000012696136723821201725740140658014068514045813119031315537010030200100003000060200100003000014068514045811502011009910040100100001000011001000120100020001100001111100321021291113996940000131013100002000040100140459140686140686140683140459
70204140682105411000001000140670139866139754129626258010640100300031000030100300001000012680876723821201727180140661014045814068513119031317807010030200100003000060200100003000014068214068511502011009910040100100001000001001000120100010001100001111000321011291114019240000101013100002000040100140459140686140686140686140686
7020414068510541111000100014048814027913997212976225801064010030006100003010030000100001269962671306920172574114043401406821406821311873131553701003020010000300006020010000300001406821404581150201100991004010010000100000100100021110001001110000110100032101129111399694000013100100002000040100140686140686140459140459140686
7020414068210531110000110014044313986613997412998725801034010030006100003010030000100001269962672396520172574114043401406851406851311873131777701003020010000300006020010000300001406851406821150201100991004010010000100000100100021010002000110000111110032101129211401924000013100100002000040100140686140686140459140459140459
7020414068510541111000100014067014027913997412998725801064010030006100003010030000100001269946672382120140439114066101406821404581309643131780701003020010000300006020010000300001406851406821150201100991004010010000100000100100033110001002110000110100032101129111399694000001313100002000040100140683140686140686140723140459
7020414069010531110000110014067014027613997412998725801064010030006100003010030000100001269962672396520172574114066101403871406851311913131780701003020010000300006020010000300001406851404611150201100991004010010000100000100100011110001000110000110110032101129111401924000013010100002000040100140686140459140686140686140683
7020414045810541010000100014044313994213997412976225801064010030003100003010030000100001269962672396520172574114043401406851406851311893131777701003020010000300006020010000300001406851406821150201100991004010010000100000100100011010002000110002110100132101129411401954000013130100002000040100140689140459140686140683140683
70204140458105311100002101140670140276139972129987258010340100300061000030100300001000012680876723965201725740140434014068514045813119031315537010030200100003000060200100003000014045814068211502011009910040100100001000001001000120100010001100001111100321011291113996940000101310100002000040100140686140643140683140459140459
702041404581052111100010001406701402791399741299872580106401003000610000301003000010000126996267130692014029501406610140685140690131190313155370100302001000030000602001000030000140685140682115020110099100401001000010000010010002101000100211000001112003210112911139969400000100100002000040100140459140686140683140686140459
702041406851052111000011000140669140279139747129987258010640100300031000030100300001000012699626713069201737460140666014068514069013119031315537010030200100003000060200100003000014068514045811502011009910040100100001000001001000111100010001100001101000321011291114019240000131313100002000040100140686140686140459140686140686

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0679

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
700251404641052000000010100114066614023213997012998125800164001030006100003001030000100001270547672377220171978114065414067814068113119631318577001030020100003000060020100003000014045814045811500211091040010100001000011010001111000102110000111100314071222613997840000669100002000040010140459140459140679140682140682
700241404581053101010080000114044314017513974712976225800134001030003100003001030000100001268578672377220171978114065414045814067813097431315837001030020100003000060020100003000014045814067811500211091040010100001000011010001211000102110000110110314021222614020040000060100002000040010140682140682140459140682140682
7002414067810541110000100000140663140173139970129981258001640018300061000030010300001000012704546723772201724261140654140681140458131196313186270010301791000030000600201000030000140681140678115002110910400101000010000010100034110003012510000111110314021212614020040000990100002000040010140682140459140682140459140459
700241406811053100000020100014066614017313974712974125800164001030006100003001030000100001268578672377220140295014043414046114068113097431318597001030020100003000060020100003000014068114067811500211091040010100001000001010002201000100110000111110314021212214020040000669100002000040010140682140687140459140682140682
700241406841053100000020100014066314017313996712998425800164001030006100003001030000100001270454671306920140295114043414068114045813119631318867001030020100003000060020100003000014068114067811500211091040010100001000001010002211000600110000011120314021222213997840000609100002000040010140682140686140682140459140679
700241404581054110000020100014066914017613996712998425800134001030003100003015230000100001270454672377220171978114065714068114068113097431318027001030020100003000060332100003000014067814068111500211091040010100001000001010002211000342110000011120314021212214020040000990100002000040010140682140459140679140459140459
7002414068110521120000101000140443139826139970129762258001640010300061000030010300001005012705476723772201719781140657140678140681131196191318027001030020100533000060020100003000014068114067811500211091040010100001000001010002111000233110000010160314021222614020140000909100002000040010140682140744140683140682140682
7002414068110521100000130000014044314023213974712998425800134001030003100003001030148100001270454672377220172426014065414045814045813119831318587001030179100003000060020100003000014068114067811500211091040010100001000001010001111000210110000111100314061222214020040000690100002000040010140682140460140683140529140459
700241406811054101100070100114066614017313997012998425800164001030003100003001030000100001270454672377220171978114043414045814045813119631318727001030020100003000060020100003000014068114045811500211091040010100001000001010002211000200110000110110314021212214020040008969100002000040010140682140679140682140682140687
700241404581054111000020100014066714023213997012998425800134001030006100003001030000100001270454672391620176621014043414067814045813119631318027001030020100003000060020100003000014068114067811500211091040010100001000001010001111000201110001011110314021222614020040000699100002000040010140679140679140679140459140682

Test 4: throughput

Count: 8

Code:

  ld2r { v0.8h, v1.8h }, [x6]
  ld2r { v0.8h, v1.8h }, [x6]
  ld2r { v0.8h, v1.8h }, [x6]
  ld2r { v0.8h, v1.8h }, [x6]
  ld2r { v0.8h, v1.8h }, [x6]
  ld2r { v0.8h, v1.8h }, [x6]
  ld2r { v0.8h, v1.8h }, [x6]
  ld2r { v0.8h, v1.8h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5008

retire uop (01)cycle (02)03mmu table walk data (08)090e0f191e1f22233a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24020540073300011003580101400490825240182100160082800001001600008000050041898545600141400404004140059997303100172401002008000016000020080000160000400594005911802011009910010080000800001100800003708000000080035613141051101161140061140800001600001004006040065400654006540060
24020440064299001100000140049082524018210016007080000100160000800005004189965439994140022400644006499730310017240100200800001600002008000016000040041400591180201100991001008000080000010080000370800310035800006131370511011611400381010800001600001004006540042400654006540042
2402044005930000000130010140044108252401821001600828000010016000080000500417950543999414004540041400649973031001724010020080000160000200800001600004006440041118020110099100100800008000001008000037080035003580035003500511011611400611414800001600001004006540065400424006040065
24020440064300000002060100400498025240182100160082800001001600008000050041899618450941400224004140041997331310022240100200800001600002008000016000040041400591180201100991001008000080000010080000370800000035800350135410511011611400611414800001600001004004240042400604004240065
24020440059300000001170100400261082524010010016008280000100160000800005004189735439994140045400594005999730310001240100200800001600002008000016000040064400411180201100991001008000080000010080000370800350035800006135370511011611400611414800001600001004006540042400654006540065
240204400413000000021201014004981025240100100160070800001001600008000050041899618450941400224006440059997403100172401002008000016000020080000160000400644005911802011009910010080000800000100800003708000001080035003141051101161140038100800001600001004006540065400604006540042
240204400413001001016300014004980252401821001600828000010016000080000500417950184509404004040059400419973031000124010020080000160000200800001600004006440059118020110099100100800008000011008000041080035000800356035410511011611400381414800001600001004006540060400424006540042
24020440064300000001610101400498825240182100160082800001001600008000050041900618450940400454006440041997303100222401002008000016000020080000160000400434005911802011009910010080000800000100800003708003500080035613541051101161140056140800001600001004004240065400654004240065
240204400413000100017601014004988252401821001600828000010016000080000500418996543999404002240064400649973031001724010020080000160000200800001600004004140041118020110099100100800008000001008000037080035003580031610410511011611400561014800001600001004006540065400654006540042
240204400413000000037000140049802524017010016000080000100160000800005004189964560014040045400414005999730310006240100200800001600002008000016000040064400591180202100991001008000080000010080000370800000035800316104105110116114006100800001600001004006040042400424006540065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e1f22233a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24002540059300000000457000140054300725240094101600248000010160000800005041993154407500400500400474006910001310039240010208000016000020800001600004005940059118002110901010800008000001080000037800310000800356104100502011161716400440130080000160000104007040048400484007040070
2400244006930011011144401014004400002524009210160000800001016000080000504190154560014140045040064400649996310049240010208000016000020800001600004006940047118002110911010800008000001080017180800572222080000601645161502015161512400380140080000160000104006540042400654006540060
240024400593000000001430101400440080252400921016007080000101600008000050419586184567404002204004140041999631004424001020800001600002080000160000400644005911800211090101080000800000108000003780035000358003101310005020161614144005601414080000160000104006540060400424006040065
2400244004130000000017001014004408802524001010160082800001016000080000504190095439994040045040064400649996310039240010208000016000020800001600004004140041118002110901010800008000001080000008000000008003561314100502013161015400660130280000160000104004840048400704004840070
24002440069300110100410000400490880252400101016000080000101600008000050419586543999404002204006440064999622101292400102080000160000208000016000040064400421180021109010108000080000010800000378000000008003560041005020151616134006101014080000160000104004240060400654006540065
24002440059300000000410101400490880252400921016008280000101600008000050418518543999404002204004140041999631003924001020800001600002080000160000400644004111800211090101080000800000108000000800000000800356135000502010161713400660130280000160000104007040048400484004840070
2400244006930011110071010140049010802524001010160082800001016000080000504197214560014040045040064400649996310049240010208000016000020800001600004004740069118002110911010800008000001080017160800571002080041605745162502010161113400610140080000160000104006540042400604006540042
2400244005930000000036401024005439952524003410160084800001016000080000504197695440750040028040069400479996310044240010208000016000020800001600004005940059118002110901010800008000001080000037800350003580000013541005020161612164004401315280000160000104004840070400704005140070
2400244004730011110015101004002608802524009210160070800001016000080000504195861975048040022040064400419996310039240010208000016000020800001600004004140041118002110901010800008000001080000037800000000800356104100502012161515400610014080000160000104004240042400604006540060
24002440041300000000331760014004900802524009210160000800001016000080000504195864560014040045040041400649996310049240010208000016000020800001600004004740047118002110901010800008000011080017180800581006180041615845162502012161612400610014080000160000104006040042400654006540060