Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2R (post-index, 2D)

Test 1: uops

Code:

  ld2r { v0.2d, v1.2d }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 4.004

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.004

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e2223243a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
63005298132290160017011200046712893700174284012100020021000100020001000500050002390212022721292182934181040001000200020002000291482919611610011000100010034410020221000313001319895676873320894020684316938141840402864310001640713125144151000200010002942929362293392934529325
63004294522280210015011154000467428895111727540081001200410001000200010005000500023870612273729294294313294000100020002000200029182292361161001100010001000031001004301001313001324192866975317153720707323538101639402896510001634613076144731000200010002938129316293172930629304
63004294602280210015011100004684289951117295400610002006100010012000100050005000238845022700291522946731040001000200020002000293792929511610011000100010000210010011001213001317795536920320393420717325338111740402881210001616713181143601000200010002942829404294892939229403
63004294212270160020011210047652892911172874006100020041000100020001001500050002388970226852924629407101040001000200020002000291902935711610011000100010000310010001001203001332394616976314354020807338738151546442873910001617613158143821000200010002947129352293142940229331
6300429382228015001301140004580288800117194400610002010100010002000100050005000238985022728291122924731040001000200020002000292302916611610011000100010000310010011000212001312393706937310774020759329338151335352857410001633413110143991000200010002934729363292402935729229
63004294042270170014010300046792890610173454000100020061000100020001000500050002388050227142922129338310400010002000200020002915029115116100110001000100003100100010012130012934950468493111103920700313938171842342868210001634113284144861000200010002932529338294112933929289
63004293722280130023011400046832880211173024006100020061000100020001000500050002387217022751291792933931040001000200020002000292032920711610011000100010000310010011000213001310593816857314673920609322538021237362844210001614013353142361000200010002924429235292962931529388
630042934122801800160114000464628765111730940061000200610001000200010005000500023870170227332902329215310400010002000200020002918029106116100110001000100003100100110012130013081923369303108103620586319338061537362850010001624413255144091000200010002936329476292142919229318
63004292352260140015011400046322892110172454006100020061000100020001000500050002389612022705292662928231040001000200020002000293092922911610011000100010000310001041001312111292492096906308164020618304238122334412843110001631813337146931000200010002943129417293242937329318
63004292852201180018111200044972889610172604006100020061000100020001000500050002381250227112914529344310400010002000200020002922229156116100110001000100002100000010002020012763919568813037104320709308138152335362841710001652013371146271000200010002932629322293372935729303

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.2d, v1.2d }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0055

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243f43494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70205140061108610001100200001400421113959725901065010030006100004010030000100001237053533461816116580140031140049140060130737313116480100302001000030000602002000030000140058140055115020110099100401001000010000010010000021000104100010110321011211113972550000666100002000050100140044140044140056140056140056
70204140055108500001000200001400401113958525901065010030012100004010030000100001237008533421016118010140031140061140055130732313115280100302001000030000602002000030000140055140055115020110099100401001000010000110010000011000104100001110321011211113972550000000100002000050100140056140044140045140056140056
702041400431085000000001400001400401113960025901035010030003100004010030000100001236999533374816116357142640140055140043130731313116180100302001000030000602002000030000140055140055115020110099100401001000010000010010000021000114100020110321011211113972550029676100002000050100140056140056140145140044140044
702041400551085000010001400001401331113961025901065010030006100004010030000100001237008533150916127292140093140043140055130731313123880100302001000030000602002000030000140057140043115020110099100401001000010000010010000001000107100020100321011211113972650000766100002000050100140058140146140056140044140059
702041400431085001000001300001400441113959725901065010030006100004010030000100001236972533419616122473140037140055140055130725313124480100302001000030000602002000030000140043140043215020110099100401001000010000110010001021000101100011110321011211113972550000006100002000050100140056140056140056140060140056
702041400551086000000002880011400281013959725901065010030006100004014530000100001237008533120316126688140031140055140043130719313115880100302001000030000602002000034364144033143064215020110099100401001000010000010010001021000101100001110321011211113972550000666100002000050100140144140056140056140056140044
702041400431086000011001880001400431113958525901035010030006100004010030000100401236999533261116115791140031140055140043130731313115880100302001000030000602002000030000140055140055115020110099100401001000010000010010000021000103206100011110321011211113972650000600100002000050100140056140056140056140056140056
70204140043108500001000200001400401113958525901065010030006100014010030000100001236999533128116120724140034140055140044130745313115880100302001000030121604922000030000140043140055115020110099100401001000010000010010000021000121100011110321011211113980350000606100002000050100140056140044140056140056140056
70204140043108500011001200001400401113959754901245010030003100004010030000100001236999533272116117892140031140161140049130731313116180100302001000030000602002000030000140055140057115020110099100401001000010000010010000021000111100011110321011211113978950000666100002000050100140444140157140319140236140243
7020414033210870000002318398176000140218101397371099016050139300111000540254304741004012528255337260161249621401881402231402471308082913132781019304471008030121604422016430119140296140233415020110099100401001000010000110010002021000804100001100321011211113971350000066100002000050100140056140103140044140044140057

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
700251400471085000000000130000014003513964725900135001030003100004001030000100001245907533328516114779014002301400501400501307493131209800103002010000300006032220000300001400951400501150021109104001010000100000101000001100001001000011031409873713971950000969100002000050010140048140051140051140054140141
700241400471085000000000130000014002013965325900295001030007100004001030000100001245847533270616114779014002601400471400501307813131211800103002010000301206002020000300001400921400431150021109104001010000100000101000001100001031000011031404876413971950000660100002000050010140048140051140048140048140146
70024140047108500000000010000014003213964725900255001030003100004001030000100001245907533328516115176014002301400501400351307463131194800103002011213330186002020000300001400481401215150021109104001010000100000101000001100001001000011031407875513970850000900100002000050010140056140051140096140048140052
70024140050112500000000110000014003513965054900135001030003100004001030000100001245907533321116114779014003231401461400501307523131209800103002010081300006002020000300001400471401181150021109104001010000100000101000001100010001000010031406873313972250000909100002000050010140051140051140051140051140036
70024140050108600100110100100014003513965025900135001030003100004001030000100001245847533328516115181014002601401391400501308253131211800103002010000300006002020000300001400501400351150021109104001010000100001101000001100002001000011031644874413975450113969100002000050010140052140053140051140051140051
70024140051108600100001011400100140035139650259001350010300031000040010300001000012458795333173161147790140023014005014005013082931312108001030020100003000060020200003000014005314004711500211091040010100001000001010000011000410310000112314078775139707500009610100002000050010140052140051140051140052140052
700241400501086001001000100000014008013965025900105001030000100004001030000100001245847533328516114779014002601400501400351308223131210800103002010000300006002020000300001400501400471150021109104001010000100000101000000100001031000010031404878513972250012909100002000050010140051140036140036140144140036
70024140050108500100000010010014003513965025900135001030003100004001030000100001245907533452416115176014002601400351400501307967131194800103002010000300006002020000301181401371401451150021109104001010000100000101000000100001032781000011031407876513972250000909100002000050010140051140036140051140051140051
7002414004710860000000001880000140032139650259001050010300031000040010301211000012459075333327161147790140011014003514005013075431312098001030020100003000060020200003000014014614004711500211091040010100001000001010000011000000010000110314058775139722500100610100002000050010140244141356141140140428140129
700241402411086101110033529880000140133139613259002950010300071000240010301181007812459075333285161195780142097014281414127313105541131291812063026410040304876050020242303631403201402424150021109104001010000100000101000421100061763231000311031403875513971950000006100002000050010140051140036140036140051140051

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.2d, v1.2d }, [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0066

retire uop (01)cycle (02)03090e0f18191e1f22243f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
702051400701086100001000014004813956525901005010030003100004010030000100001237096533410116119069014004201400661400551307313131178801003020010000300006020020000300001400631400611150201100990100401001000010000010010000001000000010000010032101801113973550000660100002000050100140064140105140067140064140064
70204140055108600000100014004813960825901035010030003100004010030000100001237096533324516116353014003901400631400631307393131158801003020010000300006020020000300001400551400551150201100990100401001000010000010010000011000000010000110032891801113973550000679100002000050100140075140064140068140056140068
702041400631125001000010140051139715259010350100300031000040100300001000012370965333551161178850140039014006314006313073931312078043730200100003000060200200003000014006514006611502011009901004010010000100000100100000010000000100001100321011211114005150000999100002000050100140056140064140064140064140064
7020414006610490000331611000140051139565259010350100300031000040100300001000012370965332803161163530140111014006314006313073931312078010030200100003000060200200003000014012214006611502011009901004010010000100000100100010110000003100001100321011211113973550000966100002000050100140064140056140064140067140056
70204140063104900000100014004013956525901035010030000100004010030000100001244902533380316127390014003901404041400631307313131158801003020010000300006020020000300001400551400661150201100990100401001000010000010010000011000000010000110032101801113973550000999100002000050100140064140064140064140064140064
70204140063104900000100014005113960025901035010030003100004010030000100001237771533076416116236014003901400631400631307393131158801003020010000300006020020000300001400631400661150201100990100401001000010000010010000011000000010000110032101801113973550000969100002000050100140066140111140068140070140067
702041400631086000002200014005113959825901035010030003100004010030000100001237114533334016116470014003901400631400631307313131210801003020010000300006020020000300001400641400701150201100990100401001000010000110010000011000000310000110032101801113973550000960100002000050100140064140067140064140067140067
702041400661085000006000140048139565259010350100300031000040100300001000012372225333517161168630140039014006314005513073931312078010030200100003000060200200003000014006314006111502011009901004010010000100004100100000110000103010000110032101801113973550000999100002000050100140068140056140064140064140064
7020414006310860000013000140228139565259010350100300031000040100300001000012370965334222161181250140041014006314010913074831312078010030200100003000060928200003000014006614006311502011009901004010010000100000100100000110000200100000100321011211113973550000969100002000050100140067140067140067140064140056
702041400631085000001000140049139567259010350117300001000040100300001000012376995337178161162360140039014006314006313076631311588010030200100003012060200200003000014006314006611502011009901004010010000100000100100000110000120100001100321011211113980350000969100002000050100140064140064140064140064140067

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0052

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140053108600011001000014002113965825900135001030003100004001030000100001245979533359116115517140030014006014005413075331312138001030020100003000060020200003000014005814005411500211091040010100001000001010000011000010010000110031400287321397305000013013100002000050010140059140059140059140037140510
7002414003610850001100100001405341398841679009250058300251000640719305961019612583985344866161558441400343140447140058130757313121780010300201000030000600202000030000140058140058115002110910400101000010000010100000110000106100001100314002872313973050000131310100002000050010140059140059140061140059140055
700241400541086000110010000140039139655259001350010300031000040010300001000012459795333591161155171400140140058140058130757313121780010300201000030000600202000030000140036140058115002110910400101000010000110100000110000000100001100314002873313973050000131013100002000050010140059140059140059140061140059
700241400541086000000010100140021139658259001350010300031000040010300001000012459795333591161157081400340140058140058130757313121780010300201000030000600202000030000140058140054115002110910400101000010000010100000110000000100001100314003873313972650000101313100002000050010140059140059140059140063140059
70024140036108500000001000014002113965825900135001030000100004001030000100001245979533274516113460140034014005814005413075731312178001030020100003000060020200003000014005814005811500211091040010100001000001010000001000010010000110031400387331397315000013013100002000050010140059140059140059140060140061
700241400581086000000010000140046139659259001050010300031000040010300001000012459795333630161157081400340140058140058130753313121780010300201000030000600202000030000140058140054115002110910400101000010000010100000110000120100001000314003873313973050000131013100002000050010140062140059140059140037140038
70024140058108600000001000014004313965425900135002030003100004001030000100001245979533274516117756140012014003714005813073531311958001030020100403000060020200003000014003614005811500211091040010100001000011010000011000010610000110031400387331397325000001310100002000050010140059140059140061140059140059
70024140058108600000101000014004613965825900105001030003100004001030000100001245979533370816115517140034014005814006113076031312138001030020100003000060020200003000014005814005811500211091040010100001000001010000011000010010000100031400387231397085000013130100002000050010140059140153140059140037140059
70024140054108600000001000014004313965825900135001030003100004001030000100391245979533274516115517140034014005414005813075731312178001030020100003000060020200003000014014914005411500211091040010100001000001010000011000000010000110031400310233139730500000014100002000050010140059140158140500140059140158
70024140244108711100231133880101402171397025290055500323001110003402983023710039125078253372521613054114024001402521402381308162713130080313308801078033147666122209433166142396142164231500211091040010100001000001010005211000402938510003110031400387231397305000013100100002000050010140037140037140037140037140037

Test 4: throughput

Count: 8

Code:

  ld2r { v0.2d, v1.2d }, [x6], x8
  ld2r { v0.2d, v1.2d }, [x6], x8
  ld2r { v0.2d, v1.2d }, [x6], x8
  ld2r { v0.2d, v1.2d }, [x6], x8
  ld2r { v0.2d, v1.2d }, [x6], x8
  ld2r { v0.2d, v1.2d }, [x6], x8
  ld2r { v0.2d, v1.2d }, [x6], x8
  ld2r { v0.2d, v1.2d }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23243a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)daddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24020580041643100100001000018002610612253201628010016001480000801001600008000044082113758374982644508002280041800414992403499993201002008000016000020016000016000080041800412180201100991001008000080000010080008823800260068001861718015110116011800381800000080000160000801008004280042800428004280042
240204800416431011000045010080026166142532014080100160032800008010016000080000440821537583749826363080022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800001100800087238001011258001961260705110216011800380800009980000160000801008004280042800428004280042
24020480041643000000006000180026066102532011480100160062800008010016000080000440821537583639826353080022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800070188002700780014611018605110116023800381800009680000160000801008004280042800428004280042
240204800416431001110022000080026166012532016280100160062800008010016000080000440821137583729826361080022800418004149924478349999320100200800001600002001600001600008004180041118020110099100100800008000001008000082380025101780019612423705110116011800380800009980000160000801008004280042800428004280042
24020480041643000000003700008002616605253201628010016006480000801001600008000044082153758374982636008002280041801824992403499993201002008000016000020016000016000080041800411180201100991001008000080000010080008723800070029800186170605110116011800381801020980000160000801008004280042800428004280042
2402048004164211001000700008002616614253201008010016003280000801001600008000044082033758371982636308002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000010080008823800000029800196125237051101160118003818000012080000160000801008004280042800428004280042
2402048004164300000000190000800261601125320162801001600628000080100160000800004408206375836198263530800228004180041499240349999320100200800001600002001600001600008004180041118020110099100100800008000001008000601480025002580014611018705110116011800381800009680000160000801008004280042800428004280042
2402048004164210200000230001800261660325320162801001600628000080237160785801334414558375837898254930800228004180041499240349999320100200800001600002001600001600008004180041118020110099100100800008000001008000082380026001280019612523705110116011800380800009980000160000801008004280042800428004280042
24020480041642100000003200008002606613253201648020716006280000801001600008000044082023758373982636108002280041800414992433349999320100200800001600002001600001600008004180041118020110099100100800008000001008000772380026106800196125237351101160118003818000091080000160000801008004280042800428004280042
2402048004164310001100320000800261661325320114801001600628000080100160000800004408209375836298263630801278004180041499240349999320100200800001600002001600001600008004180041118020110099100100800008000001008000772380117002580018012623605110116011800381800009980000160000801008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2400258004162000000000310000800261660253200488001016003880000800101600008000044076963758361982550908002280041800414994703500223200102080000160000201600001600008004180041118002110910108000080000010800000008001401280013610180502019161918800380800009980000160000800108004280042800428004280042
240024800416200000110019000080026166025320048800101600408000080010160000800004407696375836098255050800228016480041499470350022320010208000016000020160000160000800418004111800211091010800008000001080000014080014317800146110140502020151810800381800009680000160000800108004280042800428004280042
24002480041620011011001500008002616602532004280010160032800008001016000080000440769637583729825509080022800418004149947035002232001020800001600002016000016000080041800411180021109101080000800000108000001408000001480013610140502019151120800381800000680000160000800108004280042800428004280042
240024800416210000000032000080026166825320050800101600408000080010160000800004407696375836098254930800228004180041499470350022320010208000016000020160000160000800418004111800211091010800008000001080000014080014014800146110180503911151912800381801029980000160000800108004280042800428004280042
24002480041623000000002400008002616602532004880010160040800008001016000080000440769637583729825493080022800418004149947035002232001020800001600002016000016000080041800411180021109101080000800000108000001408001401780014619140502011161812800380800009080000160000800108004280042800428004280042
240024800416210000000015000080026166025320048800101600408000080010160000800004407687375836198255110800228004180041499470350022320010208000016000020160000160000800418004111800211091010800008000001080000018080014014800146110140502012151913800380800002680000160000800108004280042800428004280042
24002480041621000000002000008002616642532005080010160038800008001016000080000440769637583729825509080022800418004149947035002232001020800001600002016000016000080041800411180021109101080000800000108000001808001300800146110140502020151322800380800009680000160000800108004280042800428004280042
2400248004162100000000200000800261660253200508001016003880000800101600008000044076963758375982550908002280041800414994703500223200102080000160000201600001600008004180041118002110910108000080000010800000008001301680014619180502020152012800381800009980000160000800108004280042800428004280042
24002480041620000000006000080026160425320050800101602168000080010160000800004407696375836198255111800228004180041499470350022320010208000016000020160000160000800418004111800211091010800008000001080000014080013013800006114140502020152218800381800009980000160000800108004280042800428004280042
240024800416200000000032000080026166025320050800101600388000080010160000800004407665375837198255090800228004180041499470350101320010208000016000020160000160000800418004111800211091010800008000001080000018080101114800146191405020172510208003818000011680000160000800108004280042800428004280042