Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (multiple, 4H)

Test 1: uops

Code:

  ld2 { v0.4h, v1.4h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.006

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.006

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)0e0f1e223a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
63005294572181802611477146472880400171623004200410002000100050002385622274729069292633103000100020001000200029177290771161001100010001100021000100100021200128559273683131051068206963064381311525128650164921384115101100020002919129236292182917729382
6300429276220190240031504537287831017178300820061000200010005000238894224202909929312310300010002000100020002916729191116100110001000010002100000010013120012788957368983085753205183131381211535128592161491382115007100020002923129092292012922429183
6300429193218200220039214639287940017107300420061000200010005000238753226882904629345310300010002000100020002920929176116100110001000010002100000010012030013029930769093028756206133113381713504828605161711402014969100020002926029226292622927429316
630042921921921025002214552288240017139300620041000200010005000238880227012917229287310300010002000100020002915329125116100110001000010003100100010002120012847917168913211115220556305538149435128578160591395515087100020002927129239291872920329150
63004293382202001600214664288570017134300420041000200010005000238790227232919229261310300010002000100020002923729149116100110001000010002100000010002020012964915368483052104520595306438168485228534163061382314787100020002924529269292082927829288
6300429209219250230028114634288360017095300620061000200010005000238653227252907829232310300010002000100020002921629164116100110001000010002100000310002030013173926768273054106020607317938197585428350161131386914974100020002915829171292572926729299
63004293012202101900204964289550017170300620061000200010005000239022227772911229364310300010002000100020002908829217116100110001000010002100000010002020012905912268333084144620431312138149585528386160901374114970100020002931229224292802925229238
6300429229220220231131814558288080117185300420041000200010005000238743227372916529232310300010002000100020002916529158116100110001000010003100000010003020012922915569013056852205763069381613475528359162221392015011100020002934529273292972924829189
63004291922192002100387147202880300171843004200410002000100050002386802270429026292623103000100020001000200029158292371161001100010000100031000000100020304161304595046902311594920831324238159605428745162291398415070100020002963829270294332963329495
6300429224222200180039814556288170017109300620041000200010005000238775227122904429381310300010002000100020002928029043116100110001000010003100100010012130012942920668253047125120617313838165484828383164151359115017100020002926629209292552926629351

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.4h, v1.4h }, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514005110490001100601001400361394041393441293572580103401003000310000301003000010000126435466935842008184311400201400511400511305593131147701003020010000301906020010000300001400511400511150201100991004010010000100000100100000010000000100001100321011281113956440000101010100002000040100140052140052140052140052140052
70204140051104900000001001001400361395611393441293572580100401003000010000301003016510000126435466935842008184311400861400511400511305593131147701003020010000300006020010000300001400511400511150201100991004010010000100000100100000110000000100000100321011281113956440000101010100002000040100140075140057140054140052140052
70204140051104900000001010014002013956113934412935725801034010030003100003010030000100001264354669358420081843114006614005114003513055931311267010030200100003000060200100003000014005114003511502011009910040100100001000001001000001100000001000010003210112811139564400000100100002000040100140052140052140052140052140052
702041400511049000000010100140036139404139344129341258010340100300031000030100300001000012643546693632200818431140058140051140051130559313114770100302001000030000602001000030000140036140051115020110099100401001000010000010010000011000000010000100032101128111395444000010010100002000040100140052140052140052140052140052
7020414003510490000000100001400361395611393441293572580100401003000010000301003000010000126431066927912008184311400321400511400351305313131147701003020010000300006020010000300001400511400511150201100991004010010000100000100100000010000000100001100321011281113954440000101010100002000040100140052140052140052140052140055
7020414005110490000100101001400361395611393481293602580103401003000010000301003000010000126435466935842008184311400301400511400351305593131147701003020010000300006020010000300001400351400511150201100991004010010000100000100100000110000000100001100321011281113956440000101310100002000040100140052140052140052140052140036
702041400511049000000010100140036139561139325129357258010340100300031000030100300001000012643546693584200818431140045140051140051130624313114770100302001000030000602001000030000140051140051115020110099100401001000010000010010000011000000010000110032101128111395444000001010100002000040100140052140036140052140052140052
7020414005110490100000100001400361395611393251293572580100401003000310000301003000010000126435466927912008184311400341400561400541305593131126701003020010000300006020010000300001400351400511150201100991004010010000100000100100000110000000100001100321011282113956440000101010100002000040100140036140036140036140036140036
7020414005110490000000001001400371394041393441293572580103401003000310000301003000010000126431066935842008184311400981400511400511305593131147701003020010000300006020010000300001400351400511150201100991004010010000100000100100000010000000100001100321011281113956440000101010100002000040100140052140052140036140052140055
70204140051104801000000010014002013956113934412935725801034010030000100003010030000100001264310669279120081843114009514003514005113055931311267010030200100003000060200100003000014005114005111502011009910040100100001000011001000001100000001000001003210117111395644000010100100002000040100140052140052140052140052140052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e2223243a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140051104901001001110011400471395641393461293602580013400103000310000300103000010000126481966937312008184311400330140054140051130553313117970010300201000030000600201000030000140054140051115002110910400101000010000110100000110000000100001000031421312115111395764000001013100002000040010140052140052140052140052140052
7002414003510490100100120003140045139614139332129365258001640015300031000030010300001000012647566694022200803611140033314006014005713057231311797001030020100003000060020100003000014005414003511500211091040010100001000001010000011000000010000101003142131219131395734000013100100002000040010140052140055140055140036140036
700241400351049010010010100214003913956413934412936025800134001030003100003001030000100001264819669373120081843014003601400411400571305783131185700103002010000300006002010000300001400601400571150021109104001010000100000101000231100030011000000100314414121121613957640000131010100002000040010140055140055140055140055140055
7002414005410490100100110001140020139564139346129357258001340010300031000030010300001000012648196693731200818431140027014005414005413057231311797001030020100003000060020100003000014003514005111500211091040010100001000001010000011000000010000110103142121201113139582400000100100002000040010140058140061140061140042140061
7002414006010491111100123000114004513961113935012936525800134001030003100003001030000100001264728669387820080361114003001400541400541305723131179700103002010000300006002010000300001400541400511150021109104001010000100000101000000100000001000010100314215121121213957740000131013100002000040010140055140055140036140055140055
700241400541049010010011100114003913956413934412936025800134001030000100003001030000100001264819669373120081843114003601400571400571305783131167700103002010000300006002010000300001400411400571150021109104001010000100000101000331100000001000011111314215121181013958340000131013100002000040010140061140042140058140061140115
700241400571049110111012000114004513961413935212936525800164001030003100003001030000100001264728669402220082745114003001400541400541305723131179700103002010000300006002010000300001400511400511150021109104001010000100000101000001100000001000010100314215121131313955740000131013100002000040010140055140055140055140055140112
70024140054104901001001100001140039139564139346129357258001340010300051000030010300001000012647836693731200818431140027014005114005113057231311797001030020100003000060020100003000014005414005111500211091040010100001000001010000011000000010000101003142131211291395764000013013100002000040010140055140055140055140055140055
70024140054104901001001200011400261394101393521293652580016400103000610000300103000010000126479466938782008274511400330140041140060130578313118570010300201000030000600201000030000140060140041115002110910400101000010000010100000010003600100041010231421312091213955740000969100002000040010140036140051140144140051140037
700241400501048010010115001114003813960813934512935925800134001030006100003001030000100001264717669387720082149014002601400471403421305683131203706853002010000300006002010000300001401491400471150021109104001010000100000101000000100000001000000100314215121101313957240000969100002000040010140051140051140051140036140051

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.4h, v1.4h }, [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0458

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70205140285105201011110001000114044913940513955112956425801034010030003100003010030000100001266220670356420079595114044014025914025913053231313547010030200100003000060200100003000014025914046411502011009910040100100001000001001000001100000001000010100321261297713977140000969100002000040100140260140260140037140260140037
70204140036105201001000007000114002413940513955112956425801034010030003100003010030000100001264319670356420079595114023514046414003613076931313547010030200100003000060200100003000014004114046721502011009910040100100001000011001000001100002001000000100321281557713977140000969100002000040100140260140260140663140260140037
70204140259105206001011002000114044313986613996712976225801034010030006100003010030000100001270024672362520172426114043414075914067813118331317747010030200100003000060200100003000014068214068551502011009910040100100001000001001000321100011111000011110321251297713996940000099100002000040100140682140682140459140679140679
702041406811054111010000020001140666140262139972129984154801034010030006100003010030000100001270024672362520140295114065714068114067813118631318347010030200100003000060200100003063314045814067811502011009910040100100001000001001000111100060111000011010321261297514019140000900100002000040100140679140682140459140682140877
702041406781054110010010020002140666140262139970129762154801064010030006100003010030000100001268087672377220140295114023514003614046413076631311277010030200100003000060200100003000014046414046511502011009910040100100001000001001000001100000001000011010321291298614019140000990100002000040100140682140682140679140459140682
70204140678105411001000002000114044313986613997012998125801064010030006100003010030000100001268087671306920171978114043414045814068113118631317767010030200100003000060200100003000014045814045811502011009910040100100001000001001000310100010011000011011321471297813996940000999100002000040100140459140682140682140459140459
70204140458105411001001002000214066314026213974712998125801034010030009100003010030000100001268087671306920172714114065714068114068113118331317767010030200100003063460200100003000014068114045811502011009910040100100001000001001000211100060011000011110321281296714000340000999100002000040100140459140459140682140682140682
70204140681105311101011001000114075813978713997012976225801034010030006100003010030000100001270042671316520140295114065714068114068113118631317767010030200100003063960200100003000014067814045811502011009910040100100001000001001000111100010011000011111332271297714019140000000100002000040100140459140682140679140459140682
70204140681105413101011001435210214066614021613974712998125801064010030006100003066530000100001268087671306920171978114043414067814067813118631316697010030200100003000060200100003000014068114067811502011009910040100100001000001001000311100011211000011111321281299514019440021909100002000040100140682140459140682140682140682
70204140458105211101011001000214066614026213997012998425801064010030006100003010030000100001270024672377220172426114065514068114045813096431316997010030200100003000060200100003000014068114045811502011009910040100100001000001001000231100013011000001111321271298914019140000996100002000040100140662140459140682140682140679

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0678

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e22233a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
700251405941054110010021011406631401731397471299812580016400103000610000300103000010000127054767236252017197801406541406781406781311930313180270010300201000030000600201000030000140296140496115002110910400101000010000110100032110002001136100001111200314021222214019740000660100002000040010140679140679140679140679140679
70024140680105310000001000140449139862139753129768258001340010300031000030010300001000012705476723625201719780140654140678140678131193031318027001030020100003000060020100003000014067814067811500211091040010100001000001010003211000200016563100001111000314021222214019740000606100002000040010140679140679140679140679140679
7002414071210541110100101011406631401731399671299812580016400103000310000300103000010000127054767236252017197801406541406781406781311930313180270010300201000030000600201000030000140678140678115002110910400101000010000110100032110002069137100001101000314021222214019740000606100002000040010140679140679140679140681140704
70024140458105310100002101140663140173139967129981258001640010300031000030010300001000012705476723625201719780140654140678140678131193031318027001030020100003000060020100003000014067814067811500211091040010100001000001010002211000105801100001111100314021222214019740000666100002000040010140679140679140679140679140681
7002414067810541100100200114066314017313996712998125800164001030006100003001030000100001270547672362520171978014065414067814067813119303131802700103002010000300006002010000300001406781406781150021109104001010000100000101000121100010201100001111000314021222214019840000666100002000040010140679140679140679140679140679
7002414068010541110100210114066314017313996712998125800164001030006100003001030000100001270547672362520171978014065514067814067813119303131802700103002010000300006002010000300001406781406781150021109104001010000100000101000121100010221100001101100314021222214019740000660100002000040010140679140679140679140679140684
70024140678105411111003100114066314017313996712998125800164001030006100003001030000100001270547672362520171978014065414067814067813119303131802700103002010000300006002010000300001406781406781150021109104001010000100000101000110100010211100001111100314021212214019740000066100002000040010140679140679140470140679140679
7002414067810541110100200014066314017313996713004625800164001030006100003001030000100001270547672362520171978014065414067814067813119303131802700103002010000300006002010000300001406781406781150021109104001010000100000101000121100010311100001111010314021222214019740000666100002000040010140679140679140679140679140679
700241406781053100000044310114066314017313996912998125800164001030006100003001030000100001270547672362520171978114065414067814067813119303131583700103002010000300006002010000300001406781406781150021109104001010000100000101000211100020011100001111100314021222214019740000666100002000040010140679140680140679140683140679
7002414067810541100100100114066314017313997313007225800164001030006100003001030000100001270547672362520171978014065414068114067813119303131678700103002010000300006002010000300001406781406781150021109104001010000100000101000121100010021100001111200314021222214019740000666100002000040010140679140679140679140679140679

Test 4: throughput

Count: 8

Code:

  ld2 { v0.4h, v1.4h }, [x6]
  ld2 { v0.4h, v1.4h }, [x6]
  ld2 { v0.4h, v1.4h }, [x6]
  ld2 { v0.4h, v1.4h }, [x6]
  ld2 { v0.4h, v1.4h }, [x6]
  ld2 { v0.4h, v1.4h }, [x6]
  ld2 { v0.4h, v1.4h }, [x6]
  ld2 { v0.4h, v1.4h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e2223243a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2402054007930010011411000400500108002524010010016008280000100160000800005004179754560014040040040064400649973031002224010020080000160000200800001600004006440041118020110099010010080000800001100800000370800000103580000610000051101161140154010800001600001004006540065400424006540065
2402044006430000000411001400490800025240100100160082800001001600008000050041842045600140400400400594006499780310017240100200800001600002008000016000040064400591180201100990100100800008000001008000003708000000035800350131410005110116114003800800001600001004007040070400704007040065
24020440041300000003700004004908000252401001001600828000010016000080000500417950456001414004504004140064997303100222401002008000016000020080000160000400644005911802011009901001008000080000010080000041080035044035088003160313700051101161140056014800001600001004006540042400424006540218
240204400643000000050100140049010800252401821001600708000010016000080000500419022184509404004504006440041997303100222401002008000016000020080000160000400644005911802011009901001008000080000010080000037080035032056800356004100051101161140061140800001600001004004240060400654004240044
2402044004130000000411001400490080025240190100160084800001001600008000050041921654407501400500400414006499780310027240100200800001600002008000016000040069400471180201100991100100800008000001008000000080000010628000000041000511011611400661313800001600001004007040070400704007040060
240204400643000000041000140044088002524018210016008280000100160000800005004189964560014040045040041400649973031001724010020080000160000200800001600004006440041118020110099010010080000800000100800000410800000103880031613141000511011611400611414800001600001004004240065400654004240065
240204400413000001041000140044088002524018410016008480000100160000800005004179504560014040045040064400649973031001724010020080000160000200800001600004004140059118020110099010010080000800000100800000370800310110800006131000051101161140061100800001600001004004240042400604006040044
2402044005930000000411001400260880025240100100160084800001001600008000050041900654399941400450400414006410014652310112240100200800001600002008000016000040064400411180201100990100100800008000001008000000080130049034800006004100051101161140061010800001600001004004240042400654006540070
2402044005930000011410001400490880025240170100160082800001001600008000050041842045600140400450400644006499730310022240100200800001600002008000016000040059400591180201100990100100800008000001008000003708003101103180035615845160051101161140061140800001600001004006540065400654006540065
24020440041300000004100014002600800252401841001600908000010016000080000500417950456001404002204006440041997303999924010020080000160000200800001600004004140041118020110099010010080000800000100800000370800310000800006104100051101161140056010800001600001004006540065400654006540060

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5009

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24002540070300111008710340055151831625240106101601008000010160000800005041596354408240400374007040070999631005024001020800001600002080000160000400704005411800211091010800008000001080016170080068112728005361694416105020216244006766080000160000104007140071400714007140167
24002440055300110017800240055220300252401041016009680000101600008000050419875544082404005140070400651000131005024001020800001600002080000160000400704007011800211091010800008000001080016160080069102738005261304416105020316344006766080000160000104007140056400714007140071
240024400703001110042001400402218012524011410160078800001016000080000504190075440082040036400704007010001310035240010208000016000020800001600004005540054218002110910108000080000010800161856080069001418005261684416005020316314006706080000160000104007140071400564007140056
240024400703001000078101400401518306252400661016009680000101600008000050416038544082204003640070400701000131005024001020800001600002080000160000400554007011800211091010800008000001080016195608006921273800526169016005020616444006760080000160000104068040211400804007140071
240024400563001110078000400551518306252400661016009680000101600008000050415963544082414005140070400709996310050240010208000016000020800001600004007040070118002110910108000080000010800161756080068201728005360694316005020316454005266080000160000104007140056400744007140056
240024400703001001077001400551503062524006410160056800001016000080000504160235440892040051400704007010001310050240010208000016000020800001600004005540055118002110910108000080000010800161956080068001708005260384416005020316534006766080000160000104007140056400714005640071
240024400553001010079100400401418300252400641016005880000101600008000050415963544082414005140070400701000131005024001020800001600002080000160000400704007011800211091010800008000001080016185608006810060800366154016005020316524006796080000160000104007140071400714007140056
2400244007030011100791014005515183062524006610160104800001016000080000504157144000032140036400704005510001310035240010208000016000020800001600004007040070118002110910108000080000010800161656080068101728005361684416105020416354006766080000160000104007140071400564007640071
24002440070300111007810140055141830625240066101601048000010160000800005041570454408221400514007040055100013100502400102080000160000208000016000040055400701180021109101080000800000108001619575800382540728005261694416105020216444005266080000160000104007140071400714007140071
24002440070300111007810140055140062524011410160104800001016000080000504163715440824040036400704007010001310050240010208000016000020800001600004005540055118002110910108000080000010800161856080068101738002201684416105020316314006766080000160000104007140056400714007140071