Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2R (4S)

Test 1: uops

Code:

  ld2r { v0.4s, v1.4s }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.008

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e22243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
63005283282120240300002104973280811116318300420041000200010005000238665002275928217286723103000100020001000200028243282451161001100010000100003100300031003103001374210222724832781266194733186381114626827889139441266314079100020002826728583286552831628369
63004283502110210190002510498228090111627430082004100020001000500023864500227842827028731310300010002000100020002833028275116100110001000010000210000001100101200136759601710933651157200183264381911585427991144521269913718100020002858628530287162852028425
63004283192140210250004105093280111116311300020041000200010005000239005002276128363282513103000100020001000200028272282841161001100010000100003100300001000133001388810221716833521156198223391381413605427972146111229013626100020002858928510282692823628652
63004286912130270200002105027281590116221300220081000200010005000238845002273628294283903103000100020001000200028401285591161001100010000100243100300021001223121394610249702233001860197083297381023646527935141311244013964100020002830728421282652838528390
63004285592131291211003105021280150116271300620061000200010005000238785002273428302282373103000100020001000200028459281121161001100010001100223100200121001222121381110040715433181159195663356381112495427887146791301813631100020002827828410285112864828636
63004285122151221281004005081281730116542300620061000200010005000238685002274528225283093103000100020001000200028401283081161001100010000100220100300121001223111353110050695532831164196463173381619505627894144411274713868100020002850328359281432829228704
63004283192111291241003104872280790116352300820081000200010005000238932082273628239283693103000100020001000200028421286851161001100010000100333100400211000212101385610129709333491154195553202381115626027946149911301813985100020002850828561286292841128336
63004285642121291301004105147282750116469300820081000200010005000238703002275028304284403103000100020001000200028487283511161001100010000100222100400211000222121364910054704532901461196283296381916565528011142881303213733100020002834928383283162835428311
630042848621413012810013104997280511016140300620061000200010005000238820002276728167282513103000100020001000200028340282671161001100010001100223100300111000213111390710081707931421254197553355381519576327967149101264213721100020002873028350284742816628362
63004284832111242251001300501528167001624930022008100020001000500223868708227502837628338310300010002000100020002815028141116100110001000010022310010115100022311138199900714733781048199983287381713545028019143261255313770100020002878928434284582838928618

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.4s, v1.4s }, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22243f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
702051400531049000000201014003513943113934312934125801004010030000100003010030000100001264304669382920080505114003201400411400561305640313115270100302001000030000602001000030000140041140041115020110099100401001000010000010010000001000010110002110000321011531113956340000966100002000040100140051140051140051140051140051
702041400351049000000100014003513943113932512935625801034010030003100003010030000100001264342669353520079451114001101400501400471305580313114670100302001000030000602001000030000140035140047115020110099100401001000010000010010000011000000010000100000321011281113956340000966100002000040100140051140051140051140051140051
7020414005010480000273101014003213940413933812934125801034010030000100003010030000100001264342669353520079451114002601400581400511305310313111870100302001000030000602001000030000140047140035115020110099100401001000010000110010000011000000010000010000321011331113954440000006100002000040100140048140048140036140048140051
702041400501049100000001014003513940413935412938225801034010030000100073010030000100001264342669279120082449114002601400501400351305310313114670100302001000030000602001000030000140050140035115020110099100401001000010000010010000001000000010000110000321011281113955940000606100002000040100140051140036140051140036140051
702041400351049000000000014003513953413932512935325801034010030000100003010030000100001264342669279120081263114002601400471400471305580313112670100302001000030000602001000030000140047140047115020110099100401001000010000110010000011000000310000110000321011281113954440000966100002000040100140051140051140036140048140036
702041400501049000000000014003513943113934312935625801004010030003100003010030000100001264337669338820081263014002301400351400501305310313112670100302001000030000602001000030000140035140035115020110099100401001000010000110010000011000000010000110000321011331113956340000066100002000040100140051140048140036140036140036
7020414003510490000000000140020139534139343129356258010340100300031000030100300001000012643426693388200812630140026014004714004713055803131146701003020010000300006020010000300001400351400471150201100991004010010000100001100100000010000003610000110000321011281113956740000666100002000040100140051140051140036140036140051
702041400551049001000000014003513953413932512935625801034010030003100003010030000100001264342669353520081263014008501400471400471305580313114670100303861000030000602001000030000140062140035115020110099100401001000010000010010000011000000010000100000321011281113954440000966100002000040100140048140036140051140051140036
702041400501049000000400014002013953413934312934125801004010030003100003024930000100001264310669279120080027014001101400351400501305310313112670100302001000030000602001000030000140050140047115020110099100401001000010000010010000001000000010000100000321011331113956340000906100002000040100140051140051140051140051140036
702041400501049000000101014003513943113934312934125801034010030003100003010030000100001264310669338820081263014002601400351400351305310313114670100302001000030000602001000030000140050140047115020110099100401001000010000010010000011000000010000110000321011331113954440000966100002000040100140051140051140051140051140051

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0057

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e22233a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cdcfd0d5map dispatch bubble (d6)daddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140057104911110021001400421396141393521293472580016400103000610000300103000010000126475666940222008036101400360140060140060130578313118570010300201000030000600201000030000140064140057115002110910400101000010000010100032010001004100001111001314002012101113958240000131013100002000040010140061140061140061140061140061
70024140041104911111120001400451396141393321293652580016400103000610000300103000010000126475666940222008036111400360140060140057130578313118570010300201000030000600201000030000140044140057115002110910400101000010000010100021010002630110000010100031400131210121395794000010010100002000040010140042140061140061140061140058
700241400601049110100200014004213961413935112934725800164001030003100003001030000100001264794669402220083186014003601400601400571305783131167700103002010000300006002010063300001400461400591150021109104001010000100000101000221100010211000011110003140010120011139563400000130100002000040010140061140061140061140058140061
7002414005710491101001000140028139611139332129365258001640010300061000030010300001000012647946693602200827451140033014005714005913057531311677001030020100003000060020100003000014006014005711500211091040010100001000001010001111000200110000010110031400171210111395794000013100100002000040010140042140061140042140058140042
700241400571049111000200014004213961113933212936525800164001030006100003001030000100001264756669309120082745114003301400571400571305783131232700103002010000300006002010000300001400481400831150021109104001010000100000101000120100020011000011110003140011121011139582400000130100002000040010140058140061140061140042140058
7002414005710491010012001140045139614139352129363258001340010300061000030010300001000012647946694022200827451140017014006014006013057831311827001030020100003000060020100003000014005714005711500211091040010100001000001010002301000301110000111100031400141200111395824000001010100002000040010140058140061140061140058140058
700241400411049101100131001400421396111393321293652580016400173000610000300103000010000126475666940222008274501400360140060140060130578313116770010300201000030000600201000030000140061140057115002110910400101000010000010100013110002021100001111000314001312001113957940000101013100002000040010140061140061140061140042140061
70024140057104911111111001400261394101393321293652580029400103000610000300103000010000126479466940222008347411400360140041140060130559313118570010300201000030000600201000030000140060140057115002110910400101000010000010100011010002001100001101100314001212001113958240000131010100002000040010140061140061140061140061140061
700241400601049111000210014004513961413935212936525800164001030003100003001030000100001264794669309120082745114003601400601400571305613131167700103002010000300006002010000300001400631400571150021109104001010000100000101000130100020011000001111003140012121012139582400000013100002000040010140062140061140164140058140061
700241400601049100111210014002613949013933212936325800164001030003100003001030000101981264794670146520082745014001701400601400601305783131185700103002010000300006002010000300001400611400571150021109104001010000100000101000121100030011000011110003140015120011139582400000100100002000040010140042140061140061140058140042

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.4s, v1.4s }, [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0682

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f181e1f22233a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70205140701105511111102010114066714027613997212976225801064010030006100003010030000100001269964672382120172718114051314068214068213118703131777701003020010000300006020010000300001406821406821150201100991004010010000100000100100011110001011100001111200324811291113977240000101010100002000040100140261140261140261140261140261
7020414026010500000000101001406671402761397471299852580106401003000610000301003000010000126994667238212017257401404341406821406821311870313177770100302001000030000602001000030000140682140682115020110099100401001000010000010010002111000200110000111110032101129111401924000010010100002000040100140683140459140683140683140683
70204140458105410110002010114066714032713997412998525801064010030006100003010030000100001269946672382120172574114065814068214068213118703131777701003020010000300006020010000300001406821406821150201100991004010010000100000100100021110002001100001111100321011291114019240000101010100002000040100140683140683140683140683140683
702041406821052101100074010114066714027613997212998525801064010030006100003010030000100001269946672382120172574114043414068214045813118703131777701003020010000300006020010000300001406821406821150201100991004010010000100000100100021110001001100001111200321011291114019240000101010100002000040100140683140683140683140683140683
70204140682105411000002010114066714027713997212998625801064010030006100003010030000100001269946672382120172574114065814068214068213118703131777704873020010000300006020010000300001404581404581150201100991004010010000100000100100021110002014100001101100321011291114019240000101010100002000040100140683140683140715140293140683
70204140682105411010002010114066714027613997212998525801034010030003100003010030000100001268087672382120140295014043414068214068213118703131777701003020010000300006020010000300001406821406821150201100991004010010000100000100100032110002011100001111000321011292114021540000101010100002000040100140683140683140683140459140683
70204140458105411110002000114066714027613997212998525801064010030010100003010030000100561270054672415720172574014065814068214045813118773131355701003020010000300006020010000300001402601402601150201100991004010010000100000100100000110000000100001010000321011331113977240000101010100002000040100140037140037140261140261140261
702041402601051000000013010014002113977013955212956525801034010030003100003010030000100001266232669283920079595014023614003614026013076703131355701003020010000300006020010000300001402601402601150201100991004010010000100000100100000110000003100001010000321011291114019240000101010100002000040100140683140683140683140683140683
70204140682105410110002010114044314027613997212976225801064010030006100003010030000100001269946671306920172574014065814057614071213118803131777701003020010000300006020010000300001406821406821150201100991004010010000100001100100011110001011100001111100321011290114019240000101010100002000040100140683140459140683140683140459
70204140682105410100002010114066714027613997212998525801064010030006100003010030000100001269946672382120172574014065814068214068213096403131777701003020010000300006020010000300001406821406821150201100991004010010000100001100100032110003021100001111100321011291114019240000101010100002000040100140684140683140683140459140683

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0260

retire uop (01)cycle (02)03090e1e22233f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140260105111110140245139488139552129342258001340010300031000030010300001000012647286703613201119300140236140261140260130777031313847001030020100003000060020100003000014026014026011500211091040010100001000001010000110000027100001100003140112211139781400001000100002000040010140261140261140261140261140261
70024140260105100110140245139716139326129565258001340010300031000030010300001000012692346692839201119301140890140262140261130779031313847001030020100003000060020100003015914026014026011500211091040010100001000001010000110000001000001000031401122111397814000010010100002000040010140261140261140261140261140261
70024140260105000610140872139716139552129342258001340010300001000030010300001000012724946692839201119300140236140262140260130554031313847001030020100003000060020100003000014026014026011500211091040010100001000001010000110000001000011000031401122111404054000013100100002000040010140261140037140261140261140261
70024140260104900110140021139488139552130189258001340010300001000030010300001000012647286703613201119301140236140037140260130554031320117001030020100003000060020100003000014026014003611500211091040010100001000001010000010000001000011000031401122111395584000001010100002000040010140261140261140037140888140261
70024140260105100100140245139488139552129565258001340010300031000030010300001000012666626692839201119301140236140261140260130777031311627001030020100003000060020100003000014026014003611500211091040010100001000001010000110000001000010000031401122111397814000001010100002000040010140261140261140261140261140037
700241403131049000101402451397161401751295652580013400103000310000300103000010000126472867036132011193011400121402621402601307770313116270010300201000030000600201000030000140260140260115002110910400101000010000010100001100000310000110000314021221213955840000101010100002000040010140037140261140261140261140261
70024140260105000110140021139716139353129382258001340010300031000030010300001000012647286703613201119301140236140262140036130777031311627001030020100003000060020100003000014026014026011500211091040010100001000001010000110000001000000000031401122111397814000010010100002000040010140261140037140888140261140037
700241402601050001001402451397161395521295652580013400103000010000300103000010000126472867036132011193011402361400431402621307770313116270010300201000030000600201000030000140260140036115002110910400101000010000010100001100001010000110000314011221113978140000101010100002000040010140888140297140261140261140265
70024140266104900110140021139488139552129565258001340010300031000030010300001000012666626692839200795951140236140038140260130777031313847001030020100003000060020100003000014003614026011500211091040010100001000001010000110000001000011000031401122111397814000001010100002000040010140261140261140037140261140261
7002414026010511011014024613971613955212956525800134001030003100003001030000100001266662669283920111930114023614026514026013077703131384700103002010000300006002010000300001402601400361150021109104001010000100001101000011000000100001100003140112011139558400000100100002000040010140888140261140261140037140037

Test 4: throughput

Count: 8

Code:

  ld2r { v0.4s, v1.4s }, [x6]
  ld2r { v0.4s, v1.4s }, [x6]
  ld2r { v0.4s, v1.4s }, [x6]
  ld2r { v0.4s, v1.4s }, [x6]
  ld2r { v0.4s, v1.4s }, [x6]
  ld2r { v0.4s, v1.4s }, [x6]
  ld2r { v0.4s, v1.4s }, [x6]
  ld2r { v0.4s, v1.4s }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5009

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e1f22233a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
240205400693001110006801034005420952524018410016008480000100160000800005004157685440750140050400474006999780310027240100200800001600002008000016000040069400691180201100991100100800008000010080016164508005811159800426116451605110316234006613132800001600001004007040070400704004840070
240204400693001011106701034005409752524012410016002480000100160000800005004192045440750040050400694006999780310027240100200800001600002008000016000040069400471180201100991100100800008000010080017184508005810220800416158451625110216234006613132800001600001004007040070400704007040070
24020440069300100100670100400540975252401241001600848000010016000080000500419204544075014005040069400699973031002724010020080000160000200800001600004006940069118020110099010010080000800001008001717450800160006180042605801615110316234004413130800001600001004004840070400704007040070
240204400693001100006700034003209052524012410016002480000100160000800005004192165440750140050400694006999730310027240100200800001600002008000016000040069400471180201100991100100800008000010080016184508005800120800426116451615110316334006613130800001600001004007040070400704007040070
240204400693001011006801004005430902524018410016002480000100160000800005004191835440680140050400694006999780310005240100200800001600002008000016000040047400471180201100991100100800008000010080016164508005910061800426157451615110316334006613130800001600001004007040070400484007040048
24020440069300101000200102400543995252401901001600848000010016000080000500418062544068014005040069400479978031002724010020080000160000200800001600004006940047118020110099110010080000800001008001618450800571026180042615845161511031633400660130800001600001004007040070400484007040048
24020440069300111000670102400543995252401901001600848000010016000080000500419216544075014005040069400699978031002724010020080000160000200800001600004004740069118020110099110010080000800001008001717450800580006180000615701625110316334006613130800001600001004004840048400704007040070
24020440069300110100200003400543995252401901001600848000010016000080000500419216280003014005040069400699978031002724010020080000160000200800001600004006940069118020110099010010080000800001008001717450800570006280041015701605110316334006613130800001600001004007040070400704007040070
240204400693001111006700034003239752524018410016008480000100160000800005004192045440750040050400694006999780310027240100200800001600002008000016000040069400691180201100991100100800008000010080017174508005810165800426157451605110316334006613131800001600001004007040048400484007040070
24020440069300111000680102400540995252401241001600848000010016000080000500419183544075004005040069400699978031002824010020080000160000200800001600004006940069118020110099110010080000800001008001617008005810220800416157431605110316334006613130800001600001004007040070400704007040070

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03090e0f181e22243f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24002540055300100011400400401111025240064101600548000010160000800005041874539200140400364004240055999603100352400102080000160000208000016000040055400551180021109101080000800000108000030800240248002461030502022162318400526680000160000104005640056400424005640056
2400244004129900004200400401100252400641016005480000101600008000050418745184567414003640055400629996031002124001020800001600002080000160000400554005511800211091010800008000001080000308002402480024612405020191623154005210680000160000104005640056400564005640056
240024400413000000301040040000252400101016005480000101600008000050418518184567404003640118400559996031002124001020800001600002080000160000400414005511800211091010800008000001080000308002402480024602430502017162418401656680000160000104005640056400564005640056
24002440055300000030104002611110252400641016000080000101600008000050418518392001414003640041400559996031003624001020800001600002080000160000400554005511800211091010800008000001080000308002402480024002430502014162317400526680000160000104005640056400564005640056
2400244005530000000004004011110252400641016000080000101600008000050418518392001404004740055400559996031002124001020800001600002080000160000400564005511800211091010800008000001080000308002422780024612430502023162925400526080000160000104005640042400564005640056
240024400553000000010400401111025240064101600548000010160000800005041874539200141400364005540055999603100352400102080000160000208000016000040055400551180021109101080000800000108000008002402480024012430502023162323400386680000160000104005640056400424005640056
2400244005529900003010400401111025240064101600548000010160000800005041874518456741400224005540047999603100352400102080000160000208000016000040041406461180021109101080000800000108000008002402480000612430502023162318400526680000160000104005640056400424004240042
24002440055300010030104004011110252400641016005480000101600008000050418745392001414003640055400449996031003524001020800001600002080000160000400554005511800211091010800008000001080000308002402480024612430502022161723400520680000160000104004240042400424005640056
240024400413000000300040040111102524006410160000800001016000080000504187451845674040036400554006299960310035240010208000016000020800001600004005540041118002110910108000080000010800000800000080024602430502018162317400526680000160000104005640042400564005640056
24002440055300000030004004001102524001010160054800001016000080000504187453920014140036400434005599960310035240010208000016000020800001600004005540055118002110910108000080000110800000800240080024602430502023161823400520680000160000104005640044400564005640056