Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2R (16B)

Test 1: uops

Code:

  ld2r { v0.16b, v1.16b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.008

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.008

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
63005293012200117101901000301004562287890171823006200210002000100050002388400227530291692933631030001000200010002000291252910911610011000100001002201003000810002131113018907968183096966206303097381921575928271162661411615038100020002932129304292532923629314
630042922922001161018100004000045232880001711030082008100020001000500023901002278602918529329310300010002000100020002927129200116100110001000010042010030122100022011128629064686630631151205983131382116525328307163511407515116100020002924529278292372925129243
630042922321901171015100008000046662880201716230022002100020001000500023909002270532909529318310300010002000100020002914329122116100110001000010044210050021100101310129169193684330341150206263069381917575428372164091403715117100020002926429322293932936029217
63004293102190118001810000301004629288170171543008200810002000100050002389300227230291292933231030001000200010002000291252914211610011000100001001321004001110012131312814912468563090954206593185381911495328367163901379815109100020002922929311292892932329325
630042924021901171012100004000046272881501715730022008100020001000500023913002269402916329322310300010002000100020002914829077116100110001000010023010010025100002210128059153696830521155205473084381815585328313163341411114873100020002921429249292042924529298
63004292332190117101300000400004685287980171683008200810002000100050002389500226970291022928131030001000200010002000291102911011610011000100001003201001001110012131112921907268063031946206283192381815555628360164841397415229100020002933529287293232927829230
6300429283219011700171000034000046122872101710830082006100020001000500023893002271702919129275310300010002000100020002922029113116100110001000010021210030012100101311130089477688930511451206223100382413565228301164061404315015100020002932429273292242933129272
63004293532190116001600000300004667287590172873006200810002000100050002390602227550291042925031030001000200010022004295172935671610011000100001006421007010282110052231613253916768903053855207913070381834526528543160581389815074100020002952329594293312946729617
63004295822210119001920081105453610045292914701742530232010100520081006518324156022290002955929499501763014100420101002201029340295071016100110001000010033210020001100121311128769237684930781050206493085381920547428698161891400215040100020002972129300289502931929300
63004293602220123001700010102501004554291120175513014201610062000100050002389702228820295572985850201300010002000100020002912729563121610011000100001002121003000475410073231112947949067943007756208863102381640587128321162851391814893100020002952329766296972981529771

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.16b, v1.16b }, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03mmu table walk data (08)090e0f18191e1f2223243a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
702051400511050010000101000140036139608139344129341258010340100300031000030100300001000012643546693731200818430140027014005114005113055931311507010030200100003000060200100003000014005114005111502011009910040100100001000011001000000100000001000010100321021281113956440000101010100002000040100140053140052140052140052140036
70204140051104900010010100014003613940413934412935725801034010030000100003010030000100001264354669358420079451014001101400991400511305593131147701003020010000300006020010000300001400511400511150201100991004010010000100001100100030110000000100001000032101133111395644000010010100002000040100140036140052140052140052140052
70204140051104900000010100014003613940913932512935725801034010030003100003010030000100001264354669358420079451014001101400511400511305593131147701003020010000300006020010000300001400351400511150201100991004010010000100000100100000010000000100001010032101171113956640000101010100002000040100140036140055140053140039140052
7020414005110490000001010001400361395611393441293572580100401003000310000301003000010000126435466935842008184301400270140051140051130531313115070100302001000030000602001000030000140051140035115020110099100401001000010000110010000011000000010000101003210113311139544400001000100002000040100140052140052140052140097140083
702041400511048000000601000140020139561139344129341258010340100300031000030100300001000012643546693584200818430140011014003514005113053131311477010030200100003000060200100003000014005114005111502011009910040100100001000011001000001100000001000010100321011281113956440000101010100002000040100140052140052140052140036140052
7020414005110490000001010001400361395611393441293412580103401003000310000301003000010000126431066927912008184301400300140035140051130531313112670100302001000030000605801000030000140051140051115020110099100401001000010000110010000001000000010000101003210112811139564400000100100002000040100140052140052140052140036140052
70204140035105000000010000014003613940413934412935725801004010030000100003010030000100001266993669279120082304014001101400351400511305593131147701003020010000300006020010000300001400511400511150201100991004010010000100001100100000010000000100001000032101128111395664000013010100002000040100140036140052140052140052140036
702041400511049000000101000140036139404139344129357258010340100300031000030100300001000012643546693584200818430140027014005114003513055931311507010030200100003000060200100003000014005114003511502011009910040100100001000011001000001100000001000010000321011331113956440000101010100002000040100140054140038140052140052140055
702041400511049001000101000140036139561139325129357258010340100300031000030100300001000012643106693584200794510140027014005114005213055931311267010030200100003000060200100653000014005114005111502011009910040100100001000011001000001100000001000010000321011331113956440000101010100002000040100140052140052140052140052140052
70204140051104900110010100014003913956213934612934125801034010030003100003010030000100001264310669358420081843014001101400511400511305593131147701003020010000300006020010000300001400511400511150201100991004010010000100001100100000010000000100001010032101128111395644000010100100002000040100140052140036140052140036140052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0047

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7002514004710490000000010100140032139446139338129353258001340010300031000030010300001000012647486693388200812631140023140047140047130553031311737001030020100003000060020100003000014004714004711500211091040010100001000001010000010000010310000110314041202413956940000666100002000040010140048140036140048140048140048
7002414004710490000100010100140032139446139338129353258001340010300031000030010300001000012647486693388200812631140023140047140047130565031311737001030020100003000060020100623000014004914005011500211091040010100001000001010000110000000010000110314041204213956940000666100002000040010140048140048140048140048140048
7002414004710490000000010100140032139446139338129353258001340010300031000030010300001000012647196693388200812631140023140047140047130565031311737001030020100003000060020100003000014004714004711500211091040010100001000001010000110000000010000100314041204413956940000666100002000040010140048140048140048140048140048
7002414004710490000000010100140032139487139338129353258001340010300031000030010300001000012647486693388200812631140023140047140039130553031311737001030020100003000060020100003000014004714004711500211091040010100001000001010000110000000010000110314021202413956940000666100002000040010140048140048140048140048140036
7002414004710490000000040100140032139446139316129355258001340010300031000030010300001000012647846693388200812631140023140047140047130565031311737001030020100003000060020100003000014004714004711500211091040010100001000001010000110000000010000110314021204413956940000606100002000040010140048140048140048140048140048
7002414004710490000000010100140020139446139338129353258001340010300031000030010300001000012647196693388200812631140023140047140047130565031311737001030020100003000060020100003000014004714004711500211091040010100001000001010000110000000010000110314041202413956940000666100002000040010140048140048140048140048140048
7002414004710490000000010100140032139446139338129353258001340010300031000030010300001000012647486693388200812631140023140047140047130565031311737001030020100003000060020100003000014004714004711500211091040010100001000011010000110000000010000110314041204213962140000666100002000040010140048140048140048140048140048
7002414004710490000000010100140032139446139338129353258001340010300031000030010300001000012647486693388200812631140023140047140047130565031311737001030020100003000060020100003000014004714003511500211091040010100001000001010000110000010010000110314041202413956940000666100002000040010140048140048140036140048140048
7002414004710490000000010100140032139446139338129353258001340010300031000030010300001000012647486693388200812631140023140047140047130565031311737001030020100003000060020100003000014004714003511500211091040010100001000001010000110000000010000110314021202413956940000666100002000040010140048140048140048140048140036
7002414003510490000100010100140032139446139338129353258001340010300031000030010300001000012647486693388200812631140023140047140047130565031311737001030020100003000060020100003000014004714004711500211091040010100001000001010000110000000010000110314041204213956940000666100002000040010140048140048140048140048140048

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.16b, v1.16b }, [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0260

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233f4d4e4f5051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514026010510001100001014087213977014017512996302580103401003000310000301003000010000127205566928392011207401408631400361408871305323131983701003020010000300006020010000300001408871402601150201100991004010010000100001100100000110000000100001000032102129221397724000001013100002000040100140888140888140888140263140888
7020414026010500000100000014002114043914017513020702580103401003000010000301003014810000126431967336312007959501402361402601408871307673131127701003083810000300006020010000300001400361402601150201100991004010010000100000100100000110000001652710000000003210212922139772400001300100002000040100140888140037140888140037140261
70205140260105000000001010140021139770139326129342025801034010030003100003010030000100001266232670565020119522014086314045314088913139131319827010030200100003000060200100003000014003614026011502011009910040100100001000001001000001100002001000011000321021294214039640000131013100002000040100140888140888140888140889140038
702041402601050000000425000140245140439140175130189025801034010030000100003010030000100001266232673363120111930114023614093414030813057431311277010030200100003000061490100003000014026014026011502011009910040100100001000001001000001100000001000011000321021292314039640000131013100002000040100140888140037140888140261140037
70204140260105100000000010140021140439140175129342025801624010030003100003010030000100001264337673363120131329014086314044114088713053231319827010030838100003000060200100003000014088714026011502021009910040100100001000001001000001100000024100001100032102133221397724000013100100002000040100140888140888140037140888140888
70204140887105000010041010140245139405140175130189025801004010030018100003010030000100001266232669283920111930014001214088814102813139131311277010030200100003000060200100003000014003614003651502011009910040100100001000001001000020100000001000011000321041332213977240000101313100002000040100140888140037140888140037140889
70204140036105110000001010140293140439139326130189025801034010030003100003010030000100001272055673401520202004014086314026014026013139172131982701003020010000300006020010213300001400361402601150201100991004010010000100000100100000110000000100001100032102129221395454000013013100002000040100140892140037140888140888140687
70204140260105000100001235210140872139770140175130189025801034010030003100003010030000100001264319673363120111930014001214016114026013053231313557010030200100003000060200100003000014088714026011502011009910040100100001000001001000001100000001000011000321021292214039640000000100002000040100140888140037140889141032140888
7020414088710500000000100014002113940514017512934202580103401003000310000301003000010000127205567336312020200401400121408871408871305323131982701003020010000300006020010000300001408871402601150201100991004010010000100000100100000110000000100001100032102133221397734000001313100002000040100140888140888140888140888140037
70204140887104900000000010140872139770140175130189025801804010030000100003010030000100001266232673695220112074014086314004014088713053231319827010030200100003000060200100003000014088714026011502011009910040100100001000001001000000100000031000011000321021292213977240000131313100002000040100140888140888140888140037140888

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0887

retire uop (01)cycle (02)03l2 tlb miss data (0b)0e0f181e22233f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cfl1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7002514053210500001031014087313950613956613018925800134001030003100003001030000100001272485673363120111930140867014003614088713128452813263980461351191143234927696561143434773143365142719321500211091040010100001000001010000010000001000010103140011120221401674000001010100002000040010140724140861140888140037140039
700241408871048100000014002113971713955212956525800104001030003100003001030000100001264728673363120079595140236014003614003713140131320117001030020100003000060020100003000014088714003611500211091040010100001000001010003010000001000010103140021222214040540005131313100002000040010140727140269140262140037140888
7002414003610510010183001400211397161395531295652580013400103000310002300103000010000127248567336312020200414086301408871401641314013132011700103002010000300006002010053300001408871400361150021109104001010000100000101000211000003100000000314002120221398324000013013100002000040010140106140889140261140888140888
7002414088710510000000140872139716140175130189258001340010300001000030010300001000012724856733631201119301408630140036140260131401313138470010300201000030000600201000030000140260140260115002110910400101000010000010100001100000010000101031400212022140405400080013100002000040010140932140071140262140888140037
70024140887105010000011400211404161401761301892580010400103000010000300103000010000127248567036132011193014001201400361408871314013132011703823002010000300006002010000300001408871400361150021109104001010000100000101000201000000100000010314002120221404344000010013100002000040010140705140914140900140262140888
700241400361051000000114087214039413937612934325800104001030000100003001030000100001272485669283920202004140012014088714003613140131311627001030020100003000060020100003000014003614026011500211091040010100001000001010000010000101000000003140021202213955840000101013100002000040010140747140888140888140037140037
700241408871049000000114087214039414017513018925800134001030000100003001030000100001264728669283920111930140863014088714088713140131320117001030020100003000060020100003000014003614003611500211091040010100001000001010000010000001000010103140021222213955840000101313100002000040010140952140263140037140037140888
700241400361050000010014002114039413955212956525800104001030003100003001030000100001272485673363120111930140012014026014003613077731311627001030020100003000060020100003000014088714026011500211091040010100001000001010002110000001000010103140021222214040540000000100002000040010140470140890140059140891140037
700241408871051000000014002113971613955212934225800134001030000100003001030000100001264728673363120079595140863014003614026013055431313847001030020100003000060020100003000014088714026011500211091040010100001000001010000110000001000010103140021202214040540000101313100002000040010140321140045140263140261140888
700241400361049001000114087214040314017513018925800134001030000100003001030000100001272485670361320101721140863014088714088713055431320117001030020100003000060020100003000014088714026011500211091040010100001000001010002010000001000010103140021222214040540000131313100002000040010140046140272140261140261140888

Test 4: throughput

Count: 8

Code:

  ld2r { v0.16b, v1.16b }, [x6]
  ld2r { v0.16b, v1.16b }, [x6]
  ld2r { v0.16b, v1.16b }, [x6]
  ld2r { v0.16b, v1.16b }, [x6]
  ld2r { v0.16b, v1.16b }, [x6]
  ld2r { v0.16b, v1.16b }, [x6]
  ld2r { v0.16b, v1.16b }, [x6]
  ld2r { v0.16b, v1.16b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24020540065300101100006300002400502181810252401241001600828000010016000080000500415605544008204004604006540065997303100232401002008000016000020080000160000400474004711802011009910010080000800000100800171741080016110608003761534116100511011611400620090800001600001004005640056400604005640056
240204400593000000000037000004004001801025240182100160080800001001600008000050041559927200340400280400474006599730310023240100200800001600002008000016000040065400471180201100991001008000080000010080000000800240002480024600300000511011611400560000800001600001004006640066400664006640066
240204400653001101000062000034003211800025240100100160054800001001600008000050041813939200140400400400594004199730310017240100200800001600002008000016000040055400551180201100991001008000080000010080017160080053100568003761534116100511011611400620990800001600001004005640056400564005640060
240204400413000000000000000040040018010252401781001600248000010016000080000500415588544008004002804006540047997403100232401002008000016000020080000160000400474006511802011009910010080000800000100800000300800240000800316131300000511011611400560060800001600001004006640066400484006640048
2402044004730011100000630000240032118100025240100100160054800001001600008000050041840339200140400360400554005599730310017240100200800001600002008000016000040055400551180201100991001008000080000010080016164108005310020801706024300000511011611400380600800001600001004004840066400484006640048
2402044006530010110000630100340050218110025240154100160054800001001600008000050041840339200140400220400554005599730399992401002008000016000020080000160000400414005511802011009910010080000800001100801491841080053102578000001534116000511011611400620060800001600001004006640066400664004840066
240204400653001111000062000014005021811002524015410016000080000100160000800005004181394560014040040040059400559973031001324010020080000160000200800001600004004140055118020110099100100800008000011008000003008003100031800246124370000511011611400520600800001600001004005640056400604005640056
240204400553000000000000100040044000002524017010016007080000100160000800005004184033920014040040040041400599973039999240100200800001600002008000016000040059400551180201100991001008000080000010080000030080024000248002460243700005110116114003800100800001600001004005640056400564004240060
24020440059300000000003700000400440111000252401001001600708000010016000080000500418403392001404003604005940059997303100132401002008000016000020080000160000400554005511802011009910010080000800000100800000300800240003180000602400000511011611400560060800001600001004006040056400604006040056
240204400553000000000000100040044011110025240170100160054800001001600008000050041813940000021400240400594004199730310013240100200800001600002008000016000040055400551180201100991001008000080000010080000030080024010278003101243000005110116244005601000800001600001004004640060400604006040056

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03090e0f1e22243f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cdcfd5map dispatch bubble (d6)dadbddfetch restart (de)e0eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24002540059300110371040044101025240080101600008000010160000800005041902145600141400224005940059999631002124001020800001600002080000160000400414004111800211091010800008000001080000378003103180031600370502020160029154005601080000160000104004240060400604004240060
2400244005930000037104004410102524008010160000800001016000080000504190154560014140040400594005999963100392400102080000160000208000016000040059400591180021109101080000800000108000008003510800316131370502028160027124005610080000160000104006040060400424006040060
24002440059300000010400261010252400801016007080000101600008000050418518184567404002240041400599996310039240010208000016000020800001600004005940059118002110910108000080000110800000800310080031600370502027160012274005601080000160000104006040060400424004240043
24002440041300000010400261010252400101016000080000101600008000050418518184567414002240041400419996310039240010208000016000020800001600004004140059118002110910108000080000010800003780000031800000031005020181600281140056101080000160000104004240060400424006040042
2400244005930000137004004401025240010101600708000010160000800005041900918456741400404004140041999631003924001020800001600002080000160000400414005911800211091010800008000001080127378000000800000031005020291600181840042101080000160000104006040042400604004240060
24002440059300000370040026100252400801016000080000101600008000050419018456001414004040041400419996310021240010208000016000020800001600004005940059118002110910108000080000110800000800310202800316103705020281600172540056101080000160000104006040042400424004240060
2400244005930000049004004410025240080101600708000010160000800005041851845600141400224005940041100273100212400102080000160000208000016000040059400591180021109101080000800000108000037800310680031600005020271600231140056101080000160000104004240060400604006040042
240024400413000000104004401025240010101600708000010160000800005041900945600141400224005940041999631003924001020800001600002080000160000400594005911800211091010800008000001080000378003103180031610370502027160018214008901080000160000104006040060400604004240042
2400244004130000037004002810025240092101600708000010160000800005041900945600141400224005940059999631003924001020800001600002080000160000400414004111800211091010800008000001080000378003103180000613137050202716002711400560080000160000104004340060400424004240042
240024400412991003710400441002524008010160070800001016000080000504185181845674140040400594004199963100212400102080000160000208000016000040041400411180021109101080000800000108000037800000318003160037050201916002714400560080000160000104004240042400604004240060