Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2R (post-index, 1D)

Test 1: uops

Code:

  ld2r { v0.1d, v1.1d }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 4.002

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.002

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)dfe0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
630052880623132012401200388100478528408000167024002100020021001100020001000500050022385860022674288062874971040001000200020002002286712869921610011000100011000221002000150100020020130329318684230581666204373239381915586722859610001604712895143791000200010002931329357292682940329278
6300429281237270125100000000046192879101017125400610002000100010002002100050005003238665002268229089291633104000100020002000200029117291811161001100010000100003100000015910001030213130479425680630501366203943311381423626022846110001589412933143811000200010002942228920289452900229130
63004291922343300240000130100467828650010170144006100020061000100020001000500050242384650022708287862902331040041000200020002000289142895411610011000100001000031001000151100101300131619344690630661262203263259381217646022834210001567212939139241000200010002888428947289892905329012
6300429013232281026000100000045972865200017070400610002006100010002000100050005024238733002266328899291088104000100020002000200029058289571161001100010000100000100000024100021300130709348690131421161203183226380821676822830910001580012754140751000200010002900029004291112907528954
6300429247234230025000001320000464428497011170124000100020061000100120001000500050262380060022726288482906231040001000200020002000288502894111610011000100001000231001000150100120000130579461686731101366204133237381517626022837110001594112918140901000200010002909829023290652910629059
630042902523431012900000088000473228495001169724006100120061000100020001000500050282389050022695288932919131040001000200020002000289172891511610011000100001000031001000145100123000131059262687031401155204493265381514566122830810001592212945141641000200010002910629206291552906029086
63004291032332500291000040100466328659010171044006100020081000100020001000500050082387260022688288862905331040001000200020002000288002882411610011000100011000001001000147100020000132989341686631161460204203190381421556322838610001571813083142771000200010002920029060292082913829017
6300428941234281028000004000045742858700016924401010002006100010022000100050005005238587002274428959291093104000100020002000200028900289412161001100010000100203100000017210000120013171922368553101967206503320381514666922853210001600712831142701000200010002929329339293812924629070
63004291492342500220010040000469228441001169614000100020061000100020001000500050232385050022714288472912331040001000200020002000288022869911610011000100001000031001000153100020100132639390691330861961201363301381013595822827210001571612673139281000200010002882428898288852888428811
63004290372352500220000020000473828369001169094002100120001000100020021000500550202386250022712286552898931040001000200020002000293762936011610011000100001000031000000157100020000131338995683131141167207423295381224606322864810001595712946140751000200010002949429585295212930829303

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.1d, v1.1d }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514005310490001001301001400201395772590103501003000010000401003000010000124478653300421611410901400120140050140050130726031311948010030200100003000060200200003000014005014003511502011009910040100100001000011001000000100000031000011000321011211113970850000900100002000050100140036140052140051140051140051
70204140036108500100010000140035139552259010350100300001000040100300001000012447865366527161393300140082014005014005013072603131194804223020010000300006020020000300001400501400471150201100991004010010000100000100100000110000000100001100032351801113971150000909100002000050100140051140036140036140048140051
7020414005010850011101200001400351395772590100501273000310000401003000010000124778453312031611434001400260140050140050130749031311508010030200100003000060442200003000014014114004711502011009910040100100001000001001000001100000032311000311000321011021113972250000900100002000050100143487140036140049140036140036
702041400471086001100100001400201396402590133501003000010000401003000010000123692553299391611270101400110140050140053130726031311508041330200100003000060200200003000014003714005211502011009910040100100001000011001000001100001031000001000321011211113972250000969100002000050100140051140053140036140036140051
70204140050108610111010000140038139592259010050100300031000040100300001000012447865330042161141090140026014005114003513072303131194801003020010000300006020020000300001400501400351150201100991004010010000100001100100000010000103100001100032371801113972250000666100002000050100140051140051140051140051140051
70204140050108500001000000140033139589259013350100300031000040100300001000012369035330746161141091140023014003514014313072603131194801003032010000300006020020000300001400501400471150201100991004010010000100001100100000110000000100001000032101801113977850000969100002000050100140051140051140051140051140151
702041400541085001100130000140035139589259010350100300031000040244300001000012369035330746161141090140011014004714003513072403131304801003020010000300006020020000300001401401400351150201100991004010010000100001100100000010000000100010100032101802113972250000966100002000050100140048140051140052140051140048
70204140050108600000013880101401311395772590103501003000310000401003000010000124802253310561611285311400240140047140047130711031131153801003020010000300006020020000300001401451400472150201100991004010010000100000100100030110000103100001100032101801113979950000990100002000050100140128140052140051140048140036
7020414005010860010001300101400331395772590103501003000310000401003000010000124626953304731611273911400260140050140035130726017131194801003020010000300006020020000300001401421400472150201100991004010010000100001100100010110000023100011100032101801113970550000969100002000050100140051140147140048140048140036
7020414003510861000000001014003513955225901035010030003100004010030000100001248731533182816114109014002601400501400511307250313119480100303201000030000604422000030121140050140134215020110099100401001000010000110010005001000500638010002010003257112531139855500331169100002000050100140234140228140232140376142835

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0047

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f233a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
700251400471086000000000001001140038139653259001350010300061000040161300001000012458795333173161148991400231400471400471307463131215800103002010040301246002020000300001401501400501150021109010400101000010000010100000110000300100001000031401887211913971950000666100002000050010140051140048140048140051140052
7002414003510860000000000013000140032139647259001350010300031000040010300001000012458795333173161147791400111400471400351307343131206803433016710000300006002020000300001400981400551150022109010400101000010000010100000110000000100001010031832187202213971950000600100002000050010140048140105140117140049140058
7002414005310850101001000014000140032139647259001350010300001000040010300001000012458795333173161134631400261400501400471307463131210800103002010000300006002020000300001400551400471150021109010400101000010000010100000110000003100001010031401387212013971950000666100002000050010140048140048140051140048140048
700241400351085000000000006000140039139647259001350010300031000040010300001000012458795333173161147791400231402531400511307463131206800103002010000300006002020000300001400781400521150021109010400101000010000010100000110000000100001010031402487221713972450000666100002000050010140048140048140048140048140051
700241400471085000000001001000140033139647259001350010300031000040010300001000012458795333285161147791400231400471400841307463131209800103002010000300006002020000300001404241400482150021109010400101000010000010100000010000009100001010031402287232113971950000666100002000050010140048140048140048140048140039
7002414003510850000000110010001400321396522590013500103000310000400103000010000124590753332531611334614002314004714005013074931312068001030020100003000060020200003000014012014005311500211090104001010000100000101000001100000001000011111314024872423139725500006012100002000050010140054140248140236140279140343
7002414024110871102210003239817601142156139949110900465003230018100024057630241101181250705533809916129473140178140340140326130828281313648090630384101223024060742201603036514034014025041500211090104001010000100000101000341100030113187100001111031402287152213971350000660100002000050010140054140054140056140054140054
7002414005310860100000000014000140035139650259001350010300031000040010300001000012458795332706161147791400231400471400471307463131206800103002010000300006002020000300001400641400471150021109410400101000010000010100000110000100100001010231402587172313971950000969100002000050010140036140036140036140048140048
70024140049108500000000000100014003213966425900135001030003100004001030000100001245879533328516114779140023140035140047130734313120680010300201000030000600202000030000140124140060115002110901040010100001000001010000011000000010000101003188211092322139719500006106100002000050010140052140048140048140048140048
700241400471085000000000001000140032139650259001350010300001000040010300001000012459075333173161133461400231400471400481307573131206800103002010000300006002020000300001401041404221150021109010400101000010000010100000110000000100001010031401487211513971950000666100002000050010140048140048140048140048140051

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.1d, v1.1d }, [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0065

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514005310860010110010000140050139565259010350100300031000040100300001000012449025331917161162360014003914006514006313073931311788010030200100003000060200200003000014006314005511502011009910040100100001000001001000000100001031000010100032101801113974050000069100002000050100140064140064140064140067140067
70204140063108600001100120000140051139565259010350100300031000040100300001000012370965330530161164770014003914006314006313073131311698010030200100003000060200200003000014006314006611502011009910040100100001000001001000001100000031000010100032101171113973650000999100002000050100140064140064140064140067140064
70204140065108600001100100001400401395652590103501003000010000401003000010000123709653319171611498000140039140063140055130739313115880100302001000030000602002000030000140068140066115020110099100401001000010000010010000001000000010038101000321011211113973550000969100002000050100140064140166140069140065140067
70204140068108700001000100001400481396072590100501003000310000401003000010000124804053314991611670401140032140155140066130731313116880100304671000030000602002000030120140063140055115020110099100401001000010000010010000011000312010000100000321011211113980350000669100002000050100140068140064140056140066140064
70204140065112500000001250000140048139565259010350110300031000040100300001000012441895332312161162360014003914006514006513073931312078010030200100403000060200200003012314006314006611502011009910040100100001000001001000001100001001000010100032101801113973550000699100002000050100140056140056140067140064140067
702041400631125000010001800001400481396082590103501003000710000401003000010000124222853305301610793200140039140063140066130742313120780100302001000030000604482000030121140063140061115020110099100401001000010000010010000011000000010000101000321064611113973650000666100002000050100140144140069140067140064140056
7020414006311250000110010000140051139598259010350100300071000040100300001004512467835330530161206750014003914006614005513073931312408019630200100003000060200200003014614025614006611502011009910040100100001000001001000001100001001000010100032101801113977950000969100002000050100140073140156140067140064140067
70204140444112500001100600001400481396072590103501003000310000401003000010000124491153305301611623600140039140066140066130739313120980400302001000030000602002000030000140063140063115020110099100401001000010000010010000011000000010000001000321011211113973550024999100002000050100140064140064140158140064140069
702041400631125010011001300001400481395652590103501003000310000401003000010000124490253305301611623600140042140066140055130742313120780100302001000030000602002000030000140056140055115020110099100401001000010000010010000011000020010000101000323311211113973550000969100002000050100140064140064140064140064140056
7020414006311250000110000000140131139565819012250125300071000244372322351011812416785337211161173700014030014024514043913078412513267986099305771008130359609342008230485140348140143415020110099100401001000010000010010003011000600010002101000325711132113995150010999100002000050100140241140326140344140253140343

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0052

retire uop (01)cycle (02)03l2 tlb miss data (0b)0e0f18191e1f22233f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7002514005010860000010001400371396522590013500103000310000400103000010000124592553333611611500401400281400521400521307513131211800103002010000300006002020000300001400501400531150021109010400101000010000010100000110000200100001100314006874513972450000009100002000050010140053140112140054140086140037
7002414003910850000010001400371397622590013500103000310000400103000010000124592553332491611500401400281400521400521307513131211800103002010000300006002020000300001400521400491150021109010400101000010000010100000110000000100001000314007877613972650000999100002000050010140050140113140068140056140053
7002414004910860100010001400341396862590013500103000310000400103011910000124773253333611611540101400311400521400521307353131215800103002010000300006002020000300001401531400491150021109010400101000010000010100000110000000100001100314005954413972450000009100002000050010140054140123140108140054140150
700241400531125010021300014003713971625900135001030003100014001030000100001245925533336116115004014002814005814005513075131312118001030020100003000060244200003000014005214004911500211090104001010000100000101000001100001001000011003140012874613972450000969100002000050010140060140054140096140060140125
70024140052108500000220001400391396882590025500103000010000400103000010000124592553333611611512001400281400521401541307531613121381801301431000030000600202000030000140036140049115002110901040010100001000001010000011000120910000012131730511588139708500009610100002000050010140053140050140126140066140053
700241400521086000101301014003713968325900105002030007100004001030000100001245856533274516113820014002814003614015113077535131268800103002010041300006002020000300001400381400371150021109010400101000010000010100020110000003100021100314006874713972150000969100002000050010140053140052140133140059140138
70024140036108600010000014003713972454900135001030000100004001030000100001245934533336116119150014002814005214005213075317131211800103002010000300006002020000300001400521400491150021109010400101000010000010100000110000000100001100314004876513972150010909100002000050010140053140050140213140143140053
7002414003610860000113388001400371396722590013500103000710000400103000010000124598853353581611511801401101400521400521307523131266800103014210000300006002020000300001400551400492150021109010400101000010000010100010110000000100000000314005877513970850000069100002000050010140152140050140110140061140058
700241401491086000001880014003713970253900135001030000100004001030118100001245925533340016115121014002814005214005213075131311958030830020100003000060020200003000014005414003611500211090104001010000100000101000201100020032551000211003211051255513992950039969100002000050010140233140396140346140342140337
700241402211088100279397880014022413973913790043500393001510003402943061010157125380853382981611540111400281400521400521307883131211803523002010000300006002020096300001400521400491150021109010400101000010000010100000110000003100001100314004877613972450000969100002000050010140053140061140117140074140053

Test 4: throughput

Count: 8

Code:

  ld2r { v0.1d, v1.1d }, [x6], x8
  ld2r { v0.1d, v1.1d }, [x6], x8
  ld2r { v0.1d, v1.1d }, [x6], x8
  ld2r { v0.1d, v1.1d }, [x6], x8
  ld2r { v0.1d, v1.1d }, [x6], x8
  ld2r { v0.1d, v1.1d }, [x6], x8
  ld2r { v0.1d, v1.1d }, [x6], x8
  ld2r { v0.1d, v1.1d }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2402058004164311001000480001800261660025320170801001600688000080100160000800004408136375836698265590080022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800077278003002308002361280715110011611180038180000141380000160000801008004280042800428004280042
24020480041643100000003400008002616641025320172801001600688000080100160000800004408215375836598265590080022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800078278003001298002301292770511001161480038180000131480000160000801008004280042800428004280042
2402048004164310001100360001800261664102532017280100160072800008010016000080000440813637583659826573008002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000010080008627800071078002301282770511001161158003818000013080000160000801008004280042800428004280042
24020480041643100100003900018002616083253201728010016007280000801001600008000044081363758336982655900800228004180041499240349999320100200800001600002001600001600008004180041118020110099100100800008000011008000780800070068002361292770511001161580038080000131780000160000801008004280042800428004280042
240204800416431001000034000180026166462532017280100160012800008010016000080000440813037583649826558008002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000010080007827800290228800236130070511001161168003818000013080000160000801008004280042800428004280042
24020480041642100000003601018002616600253201728010016006880000801001600008000044082023758365982655900800228004180041499240349999320100200800001600002001600001600008004180041118020110099100100800008000001008000982780031112880023613027705110011611380038180000131380000160000801008004280042800428004280042
240204800416431010010070001800261604325320172801001600148000080100160000800004408136375836598269640080022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800088278002801318002361302771511001161380038080000131380000160000801008004280042800428004280042
24020480041643100001004800018002616603253201728010016006880000801001600008000044081363758360982655900800228004180041499240349999320100200800001600002001600001600008004180041118020110099100100800008000001008000882780031017801126131277251100116168003818000001380000160000801008004280042800428004280042
240204800416431000000060001800261064125320174801001600148000080100160000800004408136375836598254950080022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800958278002800338002261302762511001161480038180000131380000160000801008004280042800428004280042
2402048004164311110000340001800260660625320172801001600688000080100160000800004408136375835998269310080022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800067278003100308002361302871511001161380038180000131380000160000801008004280166800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
240025801836431010110012300000180026106432532007880010160012800008001016000080000440761737583639826287008002280041800414994703500213200102080000160000201600001600008004180041118002110910108000080000010800077080029000288002361302760050200121611980038180000131480000160000800108004280042800428004280042
24002480041643120000001104000018002616646253200828001016007280000800101600008000044076173758365982628710800228004180041499470350021320010208000016000020160000160000801818004111800211091010800008000001080007102780006000288002361282870050200816121180145180000131380000160000800108004280042800428004280042
2400248004164311010000350000080026160412253200808001016007280000800101600008000044076173758369982741200800228004180041499470350022320010208000016000020160000160000800418004111800211091010800008000001080007727800280113380023613027600502001216111180038080000131380000160000800108004280042801838004280042
24002480041642100000003600000800261604725320082800101602508000080010160000800004407617375836398263070080022800418004149947201165002132001020800001600002016000016000080041800411180021109101080000800000108000672780031000308002361302770050200815111180038080000131380000160000800108018280042800428004280042
2400248004164410000010150900000800261604325320436801041600148000080010160536801314407617375836698450660080022801818018350002658950103321075208026116054220160260160271801818018231800211091010800008000001080097608009720012308019361300600505701534121180143080185131380000160000800108032380332801898004280323
24002480323644120000121668800028030716644411332084380103160252802828014316052580134441214837619579844337008013680181800415000239295010432109620801361602692016054416054480182801823180021109101080000800000108010372780030001288010961282771050200925121180038180000131380000160000800108004280042800428004280042
2400248004164310000000400000080026166432532007880010160068800008001016000080000440761737583669826287008002280041800414994703500213200102080000160000201600001600008004180041118002110910108000080000010800079278003001133800226134277005020012161198003808000001380000160000800108004280042800428004280042
240024800416431100000050000008002616643253200828001016007280000800101600008000044076863758365982628700800228004180041499470350021320010208000016000020160000160000800418004111800211091010800008000001080007727800300013180022012827700502001116111180038180000131380000160000800108004280042800428004280042
240024800416421000010036001008002616643253200788001016006880000800101600008000044075913758366982628700800228004180041499470115500213200102080000160000201600001600008004180749118002110910108000080000010800078318003202178002261302761050200121691280038080000131380000160000800108004280042800428004280042
240024800416431000100082500010800261661525320072800101600648000080010160013800004407648375837598251430080022800418004149947035002232001020800001600002016000016000080041800411180021109101080000800000108000772380008011328001861260700502001315913800380800009980000160000800108004280042800428004280042