Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (single, B)

Test 1: uops

Code:

  ld2 { v0.b, v1.b }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.002

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.002

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)0e0f1e223a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)dfe0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6300528482214161400214989279311111630030042004100020001000500023881322801280592844431030001000200010004000282212799711610011000100001000031003001100121313812102887083338513471977834213812164444227988148791257412982100020002853728406282192852128292
630042836221416130031495828162011162943004200210002000100050002385442278328320284083103000100020001001400028313283821161001100010000100002100000310000031360010227722333686391960433283822144343227978147101297513493100020002817328113281762821828341
630042829721115120020517428125000162803002200210002000100050002381602278328356282933103000100020001000400028314281091161001100010000100002100010010011101357610111705333877351976333733820174237227944151911302513312100020002839628360283722840128554
630042850321310120021494728133010163573000200010002000100050002385822275128388284063103000100020001000400028299283851161001100010000100002100100110001131394710256722734109411966333493816123842327988143261263413086100020002825128278282182822928228
6300428472211151100415053281280011631130042002100020001000500023798822757284202829531030001000200010004000283672828611610011000100001000021006123100311213600999971063393537194793359380694442228001142731265813565100020002845628351283442851728339
630042827721215140070494528106111162653002200210002000100050002389322276828415283963103000100020001000400028243281961161001100010001100002100310010001131357310353706534009391955333373818124142228062145021297313422100020002819328154282552837628244
630042835221111120041501828161011162773004200010002000100050002381362276628227281693103000100020001000400028236284231161001100010000100000100000610001131391710164716934085371960233853815114046327980143141266813511100020002829128520284112846228381
63004283182111316004149322813410116144300220001000200010005000238895227622812428291310300010002000100040002838328314116100110001000010000210010011003113137959939724333798401949233943817204138228051139821256913635100020002818228449285932830728195
63004282962131315004150072808700116211300420041000200010005000238908227532831328298310300010002000100040002820028248116100110001000010000010011031003030140559997708533985361955134013813124339327892150431268814001100020002824228258282642832028429
630042853021216120001499628013111161133002200410002000100050002389752276828385285503103000100020001000400028482280481161001100010000100002100000110002121368810227720234227461948733973817114043227977148071231913655100020002822328347282932823328327

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.b, v1.b }[1], [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0047

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22243a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514004710490000001001010014069813970713933812935325801194013230041100023010030000100001264328670729820081263014002301400481400471305453131205701003020010000301606244010157518651402491400472150201100991004010010000100000100100000110000020100001100340231532213989940000666100002000040100140048140080140658140611140048
70204140047105001000000082010014003213943113933812935325801034010030003100003024930000100001264328669338820081263014002301400471400471305443131118701003020010000300006020010000500001400481400351150201100991004010010000100000100100000110000000100001100321021532213955940028666100002000040100140048140049140049140150140048
702041400471049000000000208000014002013943313933812934125801034010030003100003010030000100001264328669338820081263014009501400471400471305433131126701003020010000300006020010000500001400471400471150201100991004010010000100000100100060110000000100001100321021532213955940000666100002000040100140048140036140048140048140048
7020414004710490000000101010014003213943113933812935325801034010030003100003010030000100491264382669338820081263014002331400491400471305433131126701003020010000301586020010000500001400471400471150201100991004010010000100000100100000110001000100001100321021532213955940000666100002000040100140048140048140050140149140048
7020414004710490000000001010014003213943113933812935325801034010030003100003024930000100001264328669338820081263014002301400401400471305433131118701003020010000300006057410000500001400471400472150201100991004010010000100000100100000110000300100001100321021332213968040000666100002000040100140048140048140048140048140048
7020414004710490000000101010114003313947013933812935325801034010030007100003010030000100491265635669338820081263014002301400481400471305753131118701003020010000300006020010000502661400481400361150201100991004010010000100000100100000110000000100001100321021332213954440024066100002000040100140048140048140048140048140048
7020414004710490000000001010014003213943113933812935325801034010030003100003010030000100001264328669338820081263014003101400471400471305433131126701003036010000300006020010000500001400471400471150201100991004010010000100000100100000110000000100001000321021532213955940000666100002000040100140080140080140051140048140048
702041400471049000000000100001400321394311393381293532580103401003000310000301003000010000126432866933882008126301400230140047140047130572313111870100302001000030000602001000050000140047140050115020110099100401001000010000010010000011000041183100001100321021532213955940000606100002000040100140048140051140048140036140048
702041400351049000000100528810014003213943113932512935325801034010930003100003010030000100001264328669338820081263114002301400471400471305433131120704413020010000300006020010000500001400471400491150201100991004010010000100000100100030110000000100001120321021532213955940000666100002000040100140048140052140048140144140048
70204140035104900000110013010014003213943113933812935325801034010030003100003010030000100001264328669338820081263014002301400471400471305433131126701003020010000300006020010000500001400471400471150201100991004010010000100001100100000110000000100001100321021532213955940000666100002000040100140054140050140048140048140048

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0049

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22243a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cdcfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140053104900110010100140035139491139343129356258001340010300031000030010300001000012647716693388200812630014002614005014005013056831311757001030020100003000060020100005000014005014009611500211091040010100001000001010000110000431000010100319000051210002413986240000969100002000040010140051140051140151140051140036
70024140047104900000010000140035139491139325129356258001040010300001000030010300001000012647716693869200794510014001114003514005013055331311757001030020100003000060020100005000014005014003511500211091040010100001000001010000110000001000000100314000021210002413957440000069100002000040010140036140051140036140051140048
70024140047104900000000000140035139446139343129341258001340010300031000030010300001000012647716693535200812630014001114005014003513056531311617001030020100003000060020100005000014005014004711500211091040010100001000001010000110000001000010100314000021210002413957240000960100002000040010140048140048140036140051140051
70024140035104800010010100140035139448139373129342258001040010300031000030010300001000012647716692791200794511014002614003514005013056831311617001030020100003000060020100005000014005014003511500211091040010100001000001010000110000101000000000314000021210002213956940000006100002000040010140086140051140048140110140083
70024140050104900000000000140035139492139325129356258001040010300031000030010300001000012647716693535200794510014002314003514004713056831311757001030020100003000060388100005000014004714005811500211091040010100001000001010000110000031000010100314000021200003213957240000909100002000040010140036140036140051140051140051
70024140047104900000010000140020139491139344129356258001340010300001000230010300001000012651226692887200812630014002614003514004713056831311757001030020100003000060020100005000014005014004711500211091040010100001000001010000110000053851000010100316800021210003513986440000969100002000040010140051140037140051140154140051
70024140035104900000000100140035139446139343129356258001040010300031000030010300001000012647196693535200816970014002614005014004713056831311757001030020100003000060388100005000014005014004711500211091040010100001000001010000110000001000010100314000021210004313955740000990100002000040010140051140051140051140051140036
70024140035108000000010100140020139491139325129356258001340010300031000030010300001000012647486692791200816970014001114005014003513056831311757001030020100003000060020100005000014005014003511500211091040010100001000001010000110000101000010000314000021210002213955740000999100002000040010140051140051140036140051140036
70024140035104900000010000140035139487139325129356258001040010300001000030010300001000012647716693535200812630014001114004714004713056531311737001030020100003000060020100005000014005014004711500211091040010100001000001010000110000001000010000314000021200003213957040000099100002000040010140036140051140051140051140081
7002414003510480010000000014003513944613933812935358800104001030003100003001030000100001264719669358320081263001400231400501400501305533131175700103002010000300006002010000500001400471400351150021109104001010000100000101000011000000100001000031400002170003213957240000909100002000040010140036140048140051140048140051

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.b, v1.b }[1], [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0458

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f181e2223243a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
702051408871054100100071001140670139866139974129987258010640100300061000030100300001000012699626713069201725740140434014068214068513096439131777701003020010000300006020010000500001404581404581150201100991004010010000100000100100021110001011100001111000321031291114019240000131013100002000040100140459140686140686140459140683
7020414068210521100010110001404431398661399741297622580106401003000310000301003000010000126808767238212017257401406610140685140685131190313177770100302001006430000602001000050000140458140458115020110099100401001000010000010010002211000202110000010110032101129111401954000013130100002000040100140459140683140683140686140683
70204140685108812010002000014067014033113997512977525801064010030012100003010030000100001268087672382120172574014043401404581406821309643131553701003020010000300006020010000500001406821407461150201100991004010010000100000100100021110001211100001111100321011291114019540000101010100002000040100140683140686140459140459140686
7020414045810531100000200001406701402791399741299872580106401003000610000301003000010000126810567239652017301511406580140458140685131190313178070100302001000030000602001000050000140458140682115020110099100401001000010000010010001211000211110000111100032101129111401954000013013100002000040100140683140459140683140683140686
7020414045810511100110210011406701402791399741299872580106401003000310000301003000010000126808767239652017257411404340140458140458131187313177770100302001000030000602001000050000140458140711115020110099100401001000010000110010003111000200410000111100032101171114019440000131313100002000040100140686140686140459140686140686
7020414045810541101000200001406751402761399721299872580106401003000610000301003000010000126996267239652017257411406610140685140685130964313178070100302001000030000602001000050000140685140682115020110099100401001000010000010010001101000201410000011100032101129111401954000001013100002000040100140459140686140686140459140686
7020414068210521101000100001404431402761399741299852580103401003000610000301003000010000126994667238212017257411406610140682140685130964313155370100302001000030000602001000050000140682140682115020210099100401001000010000110010002301000101110000111110032101129111399694000013130100002000040100140686140686140459140686140683
7020414068210541101000700001404431402791399741297632580106401003000310000301003000010000126996267130692017257401406610140458140685131190313177770100303891000030000602001000050000140685140682115020110099100401001000010000010010003201000158111000011111003210112911139969400000013100002000040100140459140686140459140683140459
7020414068510531100010210001406751402791399721299852580106401003000610000301003000010000126996267239652014029501406580140685140458131190313178070100302001000030000602001000050000140458140682115020110099100401001000010000010010002111000101110000111100032101129111401954000013013100002000040100140686140686140687140459140692
70204140458105210011002100014044314027913997412998725801034010030006100003010030000100001269962672396520140295114043401404581406851311903131780701003020010000300006020010000500001404581406821150201100991004010010000100001100100012010002021100001111000321011291114019540000131313100002000040100140459140459140686140459140459

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0301

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e18191e1f22233f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0l1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140594105200000000001014024413986213975312956425800104001030003100003001030000100001266650670356420141197111404400140259140259130776313116270010300201000030000603941000050000140464140069115002110910400101000010000110100000110000000100001010031400051215413978040000969100002000040010140260140465140037140260140260
70024140259105200000000100014024413948813975312956425800104001030000100003001030000100001264728669283920111784001402350140259140036130554313138370010300201000030000600201000050000140464140036115002110910400101000010000110100000110000000100001010031403041215413998440000906100002000040010140037140260140037140465140260
70024140096105200000000101014044913948813955112956425800134001030003100003001030000100001268532671336620079595001402350140464140036130554313116270010300201000030000600201000050000140464140036115002110910400101000010000010100000110000000100001010031400041204513955840000999100002000040010140260140260140465140260140465
70024140259105200000000101014024413948813975312956425800104001530003100003001030000100001266650671336620111784011400120140036140464130554313116270010300201000030000600201000050000140464140464115002110910400101000010000010100000010000000100000010031400061224413978040000009100002000040010140465140037140465140465140465
70024140036105300000100001014002113970013955112934225800134001030003100003001030000100001266650670356420111784011402350140259140036130980313116270010300201000030000600201000050000140259140259115002110910400101000010000010100000110000000100001010031400041224413955840000996100002000040010140260140037140260140037140260
70024140259105200000000001014002113970013955112998325800134001030000100003001030000100001264728670356420079595011402350140036140259130776313158970010300201000030000600201000050000140036140259115002110910400101000010000010100000110000000100001010031400061225413978040000009100002000040010140260140037140037140465140260
70024140259104900000100601014002113970013955112976825800104001030003100003001030000100001264728670356420111784011402350140263140259130554313116270010300201000030000600201000050000140259140464115002110910400101000010000010100000110000000100001010031400041225413955840000000100002000040010140465140260140260140465140465
70024140036104900000000101014024413970013975312976825800134001030000100003001030000100001268532671336620141197001402350140259140259131195313116270010300201000030000600201000050000140036140036115002110910400101000010000010100000110000000100001000031403061224413978040000069100002000040010140037140260140465140260140037
700241404641052000000001000140244139700139326129564258001340010300031000030010300001000012647286703564201117840114023501402591402591309803131383700103002010000300006002010000500001402591404641150021109104001010000100000101000001100005120100001010031400061225413998540000060100002000040010140260140260140037140260140348
70024140259104900000100000014024513948813932612956425800134001030000100003001030000100001266650669283920111784001402350140036140259130776313116270010300201000030000600201000050000140036140464115002110910400101000010000010100000110000000100001000031400041225413955840000990100002000040010140037140465140037140260140260

Test 4: throughput

Count: 8

Code:

  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.b, v1.b }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.b, v1.b }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.b, v1.b }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.b, v1.b }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.b, v1.b }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.b, v1.b }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.b, v1.b }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.b, v1.b }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.6263

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0e0f1e1f2223373a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd alu (9a)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400205501303750000250109315012621010002624023610016016180000100160000800005004000681923081150048501145010203436324010020080000160000200800003200005011750103112402011009910010080000160000800000100800000238002600268002661200015110116115013210100800003200001005007350073500765006850068
4002045010237600003200040205005221000034240288100160189800001001600008000050040006819272231501865010850067034343240100200800001600002008000032000050067501011124020110099100100800001600008000001008000002380025000800200102901511011611501050006800003200001005010950068501035011950157
40020450149376000068630810160505007121010002624070010016018880000100160000800005004001351916967150149500675018003536524010020080000160000200800003200005008950075112402011009910010080000160000800000100800000080000002680000601929015110116115011610100800003200001005012350148501385007950068
400204500723750000001095050087200150262403141001603318000010016000080000500400059191696705010750078500670035424010020080000160000200800003200005010350090112402011009910010080000160000800001100800000080026000800266126001511011611501050000800003200001005011550068500685010250104
40020450103375000032000171005012200041026240314100160396800001001600008000050040012619239920500595008650086211433224010020080000160000200800003200005015250075112402011009910010080000160000800000100800000080019004680019612029015110116115006900100800003200001005014150150501075011550068
4002045006737500000000317050088210000262401001001604408000010016000080000500400127192364415008350113501080383242401002008000016000020080070320000501035007811240201100991001008000016000080000010080000023800000008001901192901511011611501051006800003200001005006850102501535006850120
40020450067375000031000205050052010101502624010010016000080000100160000800005004000671916967150121500675010800324240100200800001600002008000032000050101501181124020110099100100800001600008000011008000002380000002680025002529015110116115010510100800003200001005011950117501235010450068
40020450102375000032000595050125210061026240350100160000800001001600008000050040006519237461500595006750230041365240100200800001600002008000032000050100500891124020110099100100800001600008000001008000002380000000800266019001511011611500831000800003200001005010250068501095010950102
4002045010237600003100045415012921010002624010010016034680000100160000800005004001261921642150089501955010101136424010020080000160000200800003200005009550112112402011009910010080000160000800000100800000080026000800266102901511011611500991006800003200001005010350108500685007850087
40020450103375101131000831501372100270262402651001602148000010016000080000500400126191971615004850119500670273602401002008000016000020080000320000501085019511240201100991001008000016000080000010080000023800260008001901252301511011611500640006800003200001005010050141501505007950068

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.6260

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22373a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd alu (9a)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400025501093750000000049012810500562101000262402471016023780000101600008000050400000191846021550060500685004402935024001020800001600002080000320000500795007911240021109101080000160000800000108000002380019000198001961202300150711744421784443315013700806080000320000105005350080500455005650045
400024501053750000000025010050064210100026240247101602378000010160000800005040006719184602155006050044500440113622400102080000160000208000032000050055500711124002110910108000016000080000010800000238001900020800196102300150281744341784449265008910866080000320000105008050080500805008050080
400024500793750000000032005605006421010002624001010160237800001016000080000504000001911249215500605007950079003342400102080000160000208000032000050079500791124002110910108000016000080000010800000238001900019800196119000150281744351784417395015300866080000320000105004550045500455010750045
400024500793750000000015700519450088210100090240010101601798000010160000800005040005919184582155003650079500440353582400102080000160000208000032000050079500791124002110910108000016000080000010800000238001900019800196119230015030205533171044243050131001066080000320000105008050080500805008050084
400024500723751010000025005605004020000262402471016023780000101600008000050400000191846011550060500555007901935824001020800001600002080000320000500795009811240021109101080000160000800000108000002380020000080020012000015030205434171055362350266001066080000320000105007450053500805010650072
40002450055374000000002501216050064210100026240291101600008000010160000800005040005719112491155004750055500440353582400102080000160000208000032000050079500441124002110910108000016000080000010800000080019000080019001923001503020553517844353450080001066080000320000105008050080500805008050064
400024500443750010000025011913500292100002724001010160101800001016000080000504000681918460215500545008350052035346240010208000016000020800003200005007950071112400211091010800001600008000001080000023800200001980020012023001502820452017105516345015400860080000320000105004550080500805008050080
400024501063750000000025002700501582101000262402471016023780000101600008000050400059191124911550025500795007900358240010208000016000020800003200005007950079112400211091010800001600008000001080000023800200002080000612023001502817443617845263050065101060080000320000105008050080500805004550045
400024500793750000000025017705002921010002624016410160093800001016000080000504000591920235115500605004450065035358240010208000016000020800003200005006850071112400211091010800001600008000001080000023800190002080019602623001502820108381713124303750197001316180000320000105005350069500805008050080
40002450064375000000002501121050064210100026240010101602378000010160000800005040000019148211155003350044500790032324001020800001600002080000320000500735005211240021109101080000160000800000108000002380019010080019602023001503020495231784427325035100866080000320000105008050080500805008050045