Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (single, S)

Test 1: uops

Code:

  ld2 { v0.s, v1.s }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.002

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.002

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)l2 tlb miss instruction (0a)0e0f1e223a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
630052949922145511504489288780001738230022002100020001000500023858122834292152944131030001000200010004000293212930511610011000100011000310010001000202012784936468513053060207223099382833525328346165461424215313100020002941229439295122944029463
6300429489220243003014526289000001732330022002100020001000500023893622718291272942131030001000200010004000292712932711610011000100011000210000001000201012858918668973049153207953036382529474628470166521418615224100020002948929407294632935329460
630042946422044300214738287850001731230022002100020001000500023858422745292042940331030001000200010004000292742924611610011000100001000210000001000212012887922969953093046208023114382627525028454166261432615188100020002938929373294302940129500
6300429426220424002114601289240001745430022002100020001000500023902522775290802944131030001000200010004000292942919711610011000100011000210000001000202013058915668573037348208633100382626535128434166441439115314100020002942729437295552944529465
630042943122022300214519288800001734330022002100020001000500023852022746291342946131030001000200010004000291942932811610011000100001000210000001000202012807917368423064149209003083382827524828453164581442415158100020002942529368294832945929303
630042948822113400214554287880101728930022002100020001000500023871522752292212935431030001000200010004000292862930011610011000100001000210000001000210012782909168313029143206473052382525495428314165031427315471100020002945129502294442939229409
630042945822124200214586289900001739330022002100020001000500023865322756291962942431030001000200010004000292962934711610011000100011000310000001000202012948914568363110247207273108382230494828432166741425815199100020002949329338295052942329482
630042942922023500314663289080001738230022002100020001000500023863422782292012960631030001000200010004000293162931511610011000100011000210000001000203012924921968313057148209133070382629504828459164671415515207100020002950129369295292954629520
630042945522132200414521288751001730830022004100020001000500023856222770292312946731030001000200010004000293072921011610011000100001000210000001000202012859914468483085350207873118382525534828432166971412515418100020002938329506294482934429408
630042944422123411314558289080001737730022002100020001000500023892422750291932940731030001000200010004000293202929711610011000100001000110000001000202012808917268283130452207843079382626515228459165301420615253100020002943729508294132941129417

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.s, v1.s }[1], [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03090e0f18191e1f22233f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70205140054104900100101014003613940413934612934125801034010030003100003010030000100001264390669373120079451114001114005114005113056231311267010030200100003000060200100005000014003714005111502011009910040100100001000011001000000100000001000010100032102128111395664000001013100002000040100140055140052140036140052140055
702041400511048000001000140020139404139346129360258010040100300031000030100300001000012643906693731200818431140030140051140035130531313115070100302001000030000602001000050000140051140051115020110099100401001000010000010010000011000000010000101000321011281113954440000000100002000040100140055140036140055140036140055
702041400541049000006790001400201394041393461293412580103401003000010000301003000010000126439066937312008184311400271400351400541305313131126701003020010000300006020010000500001400541400511150201100991004010010000100000100100000010000000100001000003210112811139566400001300100002000040100140036140036140055140055140055
702041400541049000000000140020139608139346129357588010340100300031000030100300001000012643106692791200818431140015140051140054130562313112670100302001000030000602001000050266140140140051115020110099100401001000010000010010000001000000010000101000321011281113956640000131013100002000040100140055140036140055140055140036
7020414005410490000000101400391396081393441293602580103401003000010000301003000010000126439066927912008184311400111400541400511305623131126701003020010000300006020010000500001400351400511150201100991004010010000100000100100000010000100100001010003210113311139544400001300100002000040100140055140036140055140052140057
70204140038104900000101014002013940413934412936025801034010030003100003010030000100001264390669373120081843114003014005414005413053131311507010030200100003000060200100005000014005114003511502011009910040100100001000001001000001100000001000010000032101133111395444000010013100002000040100140052140055140055140055140036
70204140054104901000100114003613940413932512936025801034010030003100003010030000100001264390669279120081843114002714005414003513053131311507010030200100003000060200100005000014003514003511502011009910040100100001000001001000001100000001000011100032101133111395644000001310100002000040100140036140055140055140036140055
70204140054104800000640001140020139608139346129360258010340100300031000030100300001000012643546693731200794511140030140035140054130531313115070100302001000030000602001000050000140054140051115020110099100401001000010000010010000011000000010000101100321011281113956640000131313100002000040100140036140036140036140055140036
70204140054104900000000014003913960813934912936025801034010030003100003010030000100001264390669373120079451114003114005114005413056231311507010030200100003000060200100545000014005414005111502011009910040100100001000001001000000100000001000010100032101128111395664000001310100002000040100140036140036140055140055140036
70204140038104900000000014003913960813934612936046801034010030003100003010030000100001264310669373120081843114002714005414005413053131311267010030200100003000060200100005000014005114005111502011009910040100100001000011001000001100000001000010000032101128111395664000013130100002000040100140055140055140036140052140036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0047

retire uop (01)cycle (02)030e0f191e1f22243f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7002514005310491100010140032139446139338129356258001340010300031000030010300001000012647486693915200794510114001101400471400501305653131173700103002010000300006002010000500001400471400471150021109104001010000100000101000011000000010000100031401012071113956940000069100002000040010140051140051140051140051140051
70024140035104911000101400201394911393431293562580013400103000310000300103000010000126477166927912008126301140026014003514005013056831311757001030020100003000060020100005000014004714004711500211091040010100001000001010000010000000100001100314010121101413957240000666100002000040010140051140051140036140051140051
70024140035104900010101401291394911393431293562580013400103000310006302923000010000126477166928872008126301140028014005014003513055331311757001030020100003000060020100005000014004714003511500211091040010100001000001010000110000000100001100314012121121213956940000060100002000040010140051140036140051140051140048
7002414004710491100010140035139487139338129353258001340010300001000030010300001000012647716692791200812630114001101400351400501305683131161700103002010000300006002010000500001400351400471150021109104001010000100000101000011000000310000110031401012111813955740000999100002000040010140036140051140051140048140051
7002414003510491001000140035139487139338129356258001340010300031000030010300001005512650596692791200812630114002601400471400501305653131175700103002010000300006002010000502651402741400471150021109104001010000100000101000011000000010000110031401012011813956940000996100002000040010140051140048140048140048140051
7002414004710490001000140020139491139325129356258001040010300001000030010300001000012647196693535200816970114002601400351400501305533131173700103002010000300006002010000500001400501400471150022109104001010000100000101000011000000010000110031401312014913957240000906100002000040010140048140048140048140048140048
700241400471049000510101400351394911393381293412580010400103000610000300103000010000126471966935352007945101140026014003514005013055331311617001030020100003000060020100005000014003514004711500211091040010100001000001010000010000103100001000314011120101413956940000906100002000040010140051140051140036140051140048
70024140050104900010001400321394461393381293562580013400103000610000300103000010000126477166935352008126301140026014011914006813056531311737001030020100003000060020100005000014005114004711500211091040010100001000001010000110000001210000010031401112112913956940000966100002000040010140051140051140048140051140036
70024140047104900010101400351394911393431293412580013400193000610000300103000010000126474866933882007945101140026014005014003513056831313237001030020100003000060020100005000014004714003511500211091040010100001000001010000110000100100001100314010121121013957240000909100002000040010140051140051140051140036140036
700241400351049110101014003513949113934312935625800104001030006100003057030000100001264780669353520081263011400260140035140035130554313117570010300201000030000600201000050000140050140047115002110910400101000010000010100001100001001000001003140812013913955740000909100002000040010140051140051140036140051140048

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.s, v1.s }[1], [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0887

retire uop (01)cycle (02)030e0f1e22233f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
702051402601050001010140457139770140175129342258011640145300031000030100300001000012643196733631201119301140012014088714088713139103131355701003020010000300006020010000500001408871400361150201100991004010010000100000100100001100000010000100003210112911139545400001300100002000040100140037140367140888140888140888
70204140887105000130014024514043913932613018925801034010030003100003010030000100001264319670361320079595114001201408871408871313910313198270100302001000030000602001000050000140891140260115020110099100401001000010000010010000110000001000010100321011291114039640000131313100002000040100140685140037140888140037140888
7020414088710491000014087714043914017513018925801034010030003100003010030000100001272055669283920111930014086301400361400361313910313112770100302001000030000602001000050000140036140036115020110099100401001000010000010010000110000001000010000321011291113954540000131013100002000040100140517140037140888140888140888
7020414003610480020014002313977013955212934225801034010030003100003010030000100001272055669283920079595114023601402601400361313910313198270100302001000030000602001000050000140887140260115020110099100401001000010000110010000110000001000000100321011291114039640000101313100002000040100140888140890140888140888140037
702041408871051001101409071401761392951293422580103401003000310000301003000010000127205567336312007959501408630140887140887131391031319827010030200100003000060200100005000014026014026011502011009910040100100001000001001000011000000100001100032101129111403964000013013100002000040100140037140888140889140037140888
702041402601051000001406621404391401751301892580103401003000310000301003000010000127205567336312020200411408630140473140296130532031319827010030200100003000060200100645000014088714026011502011009910040100100001000011001000001000000100000010032101129111397724000010013100002000040100140888140888140888140888140888
7020414003610490011014002214043913932613018925801034010030003100003010030000100001266786669283920111930114023601408871408871305320313198270100302001000030000602001000050000140887140260115020110099100401001000010000010010000110000091000010100321001331114039640000101010100002000040100140888140888140888140888140888
7020414026010510061014087213940514017513018925801034010030000100003010030000100001272055672391720111930114086301400361400361313910313198270100302001000030000602001000050000140684140260115020110099100401001000010000010010000110000031000010100321011291114039640000101013100002000040100140888140037140261140037140037
7020414003610510092500140873140439140175130189258010340100300031000030100300001005612721276733823200795951140236014088714003613053203131982701003020010000300006020010000500001408871400362150201100991004010010000100001100100001100003310000101003210112911140396400001000100002000040100140037140261140888140037140264
7020414088710500001014087213940513932612934225801034010030003100003010030000100001264319673363120079595014086301408871408871313910313198270100302001000030000602001000050000140887140036115020110099100401001000010000110010000110000690100001012032101133111395454000013010100002000040100140888140888140261140531140888

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0458

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e22233a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd2l1i cache miss demand (d3)d5map dispatch bubble (d6)dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
700251402601051001010110014002813971614017513018925800134001030003100003001030000100001272485673363120111930011402361402601402601307770313138470010300201000030000600201000050000140887140260115002210910400101000010000010100000110000100100001000003176102122001113978140000101010100002000040010140888140037140261140261140888
7002414003610510000006000140547139716139326130189126800104001730000100003001030000100001266662669283920111930011402361402601400361314010313201170010300201000030000600201000050000140887140260115002110910400101000010000110100000010000100100001010003140001122001114040540000101010100002000040010140263140261140888140097140526
700241406161049000010100014024514039414017513018925800134001030003100003001030000100551272485669283920111930011400441408871408871314017313186870010300201000030000600201000050000140036140260215002110910400101000010000110100000010000003100001010003140001122001114040540000132713100002000040010140888140888140888140888140037
70024140268105100001010001408721394881393261293422580013400103000310000300103000010000127248566975822011193001140863140260140887131401031320117001030020100003000060020100005000014088714026011500211091040010100001000001010000001000000010000001000314000112200111404054000010010100002000040010140261140261140888140261140266
70024140260105000000000001408721397161401751293422580013400103000310000300103000010000126472867036132020200401140236140260140260130554031311627001030020100003000060020100005000014088714026011500211091040010100001000001010000001000000010000101000314000112200111398174000013130100002000040010140888140037140888140888140888
700241402601049000000110014087214039414017512934225800134001030003100003001030000100001272485673363120202004011408631408871402601307780313201170010300201000030000600201000050000140887140036115002210910400101000010000010100000110000000100001011103140001121012214020440000101310100002000040010140888140888140888140037140888
70024140036105100010060001400211397161393261301892580010400103000310002300103000010000127248567336312007959500140863140887140887131408031311627001030020100003000060020100005000014088714003611500211091040010100001000001010001111000000010000101000314000212200111395584000010130100002000040010140888140037140037140888140888
70024140039105000000060001408721403941393261301892580013400103000010000300103000010000126472867336312020200401140236140036140260131401031320117001030020100003000060020100005000014088714026011500211091040010100001000001010001101000000310000100000314000112000221404054000013010100002000040010140888140261140888140888140686
70024140887105100000011001400211403941393261301892580013400103000310000300103000010000126472867336312007959501140863140260140260131401031311627001030020100003000060020100005000014088714026011500211091040010100001000001010000011000000010000100000314000112200111404054000013100100002000040010140888140037140888140888140888
70024140036105100000011001408721403941395531301892580013400103000310000300103000010000127248567336312020200400140863140887140887131401031320117001030020100003000060020100005000014068514068211500211091040010100001000011010003211000100310000100000314000112000111404054000013130100002000040010140261140261140261140037140888

Test 4: throughput

Count: 8

Code:

  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.6262

retire uop (01)cycle (02)03mmu table walk data (08)0e0f191e1f2223373a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd alu (9a)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400205501293760110001001501000101031262401001001603118000010016000080000500400126192594215008905006750114003542401002008000016000020080000320000501335010811240201100991001008000016000080000010080000080000008001961029151101116814500640000800003200001005006850068501035006850243
4002045006737500103971081027605011820105026240352100160000800001001600008000050040000019169670500480500675006703639024010020080000160000200800003200005011550100112402011009910010080000160000800000100800002380026025800266002915110716975012010106800003200001005006850134500685013450068
40020450067375000000001501500982101002624034910016025280000100160000800005004000001916967050048050067501110031062401002008000016000020080000320000500675012111240201100991001008000016000080000010080000080019025800266019231511091688501360066800003200001005012350109500995006850068
4002045006737500002601039515005221000262403711001600008000010016000080000500400126191696705004805006750067003902401002008000016000020080000320000501175009311240201100991001008000016000080000010080000080019025800000119231511071679500641066800003200001005014550068501235010050068
400204500673750000320001580500520100026240100100160412800001001600008000050040006719259420500940501335011000354240100200800001600002008000032000050067501361124020110099100100800001600008000001008000023800000080026601901511010168115006400106800003200001005006850068501155011550068
4002045010237600000010005009900002624024010016015780000100160000800005004001351920159050048050099500960773242401002008000016000020080000320000500675014611240201100991001008000016000080000010080000080072026800250025291511011169115009000100800003200001005009750110501095011050068
40020450067375000025010005011820100262405121001604128000010016000080000500400126192594205004805010550156029324240100200800001600002008000032000050107500731124020110099100100800001600008000001008000008002501980025611929151101316975006400100800003200001005012950068501585016750103
40020450067375000000100150127010100262403821001602178000010016000080000500400000192671305007805009650096023324240100200800001600002008000032000050067500931124020110099100100800001600008000001008000029800190080026610291511081610115006400106800003200001005008250068500685010050100
40020450067375010032010005005221010132624038210016041280000100160000800005004001271925232050114050067501170038024010020080000160000200800003200005010750100112402011009910010080000160000800000100800000800260080000011901511091679501300000800003200001005011450134500685006850068
4002045009737500103201010205005801010426240411100160292800001001600008000050040012619276650501230501105008105532424010020080000160000200800003200005012850067112402011009910010080000160000800000100800002380026008002600001511071697500701000800003200001005009750115501155006850068

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.6260

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e222324373a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd alu (9a)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400025501523760000008000001501732900262405811016000080000101600008000050400192191124921105008750085500801683232400102080000160000208000032000050092500941124002110910108000016000080000010800000298000000080025612529000150272294331784422225010900711080000320000105008550045501135014650146
4000245018837600000016210046015006929002624080310160000800001016000080000504001921911249211050126500445010100377240010208000016000020800003200005017350079112400211091010800001600008000011080000008002500318003201263500015027251042017104423235008600711080000320000105008150097501465009950045
4000245004437600000056100175050029290172624054210160157800001016000080000504002041911249211050025500845004414037524001020800001600002080000320000500965008011240021109101080000160000800000108000002980031002680000012535000150292511624171055202050093001151480000320000105014650097500815004550090
40002450044375000000500000175050029091002624021310160247800001016000080000504001931911249211050025501015009400367240010208000016000020800003200005007350044112400211091010800001600008000001080000008003200080031613200001502922104211786423155008610901480000320000105009250085500455007850097
400024500803750000006151002150500652092226240010101603488000010160000800005040012619213072110501265018850080010336324001020800001600002080000320000500845008011240021109101080000160000800000108000002980031003280031013135000150272294201784420205012210751080000320000105004550085500815008550081
4000245008937500001146600017905008520902624001010160247800001016000080000504000001926032111050126500855008007032324001020800001600002080000320000500445007911240021109101080000160000800000108000002980031003280000612535000150272294201784420205009300701480000320000105014650045500455014650045
40002450078375000000323100142050081009312624001010160263800001016000080000504002081930261211050066501455014401033232400102080000160000208000032000050044501441124002110910108000016000080000010800000080031003280031610350001502725104211784423215027020741480000320000105008150097500815004550090
4000245007937500010027700000501300991726240358101605158000010160000800005040020419112492110500745008450044003512400102080000160000208000032000050188500441124002110910108000016000080000010800000298003100328000061313500015031281152117106523215004110761080000320000105004550085500815004550077
40002450084375000000521000128150081210100262400101016053280000101600008000050400192191124911105007350089501880523232400102080000160000208000032000050096500831124002110910108000016000080000010800000080000003180000010000115027124421179442222500521081680000320000105014250134500805008750100
40002450087375000000350002470500291101002624032210160165800001016000080000504001261911249215500685010050044029366240010208000016000020800003200005008650044112400211091010800001600008000001080000023800260028800006019000015027205522177542425500410087680000320000105016450085500955007650102