Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (multiple, post-index, 8B)

Test 1: uops

Code:

  ld2 { v0.8b, v1.8b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 4.004

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.004

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)0e0f18191e1f22233a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
63005288482244101410020104680283220111663740041000200410001000200010005000500023856121022681285232871131040001000200020002000286772853311610011000100001000021001011001113001317392966985314715820030323338081362602809710001535812657134761000200010002869128771286312873928690
63004287932232102110040004603283420111647840041000200610001000200010005000500023854121022738286622874631040001000200020002000286702854611610011000100001000001001011001110001319594527062319916020056321738041470582809910001549712738136641000200010002865928655287612862028670
63004287952231101000020004670283010111651240041000200410001000200010005000500123842121022755285702871331040001000200020002000285592868411610011000100001000001001011001113001334793016917314427020039314338061862682817110001545112533135641000200010002872928611287602867228697
63004287882232100000000004714283320011661340041000200410001000200010005000500023840121022683285082863131040001000200020002000285402857511610011000100001000021001001001110001348095956974317216219944322338121367632813710001533612422135511000200010002882828750288292871828765
6300428738222110100000000478728343001166604004100020041000100020001000500050002384471022694286622869731040001000200020002000286052857211610011000100001000021001011001202001329796947002316915920012314538192061592819210001519112659138951000200010002869528727288582878228640
63004288162222001000000004770283650101658440041000200010001000200010005000500023834120022661284292877931040001000200020002000286312858211610011000100001000021001011000112001300395476932323006020027314538102165612820210001537012656134501000200010002870228631286132865228819
6300428809222110000007000471228418000166294004100020041000100020001000500050002384271022773284732868831040001000200020002000286552853411610011000100001000001001001000122001336293616952320506020089318238071559612812010001522712485136741000200010002870428689287492880128647
6300428672222100100002000485328350011166414004100020001000100020001000500050002384951022683285302874031040001000200020002000285752848511610011000100001000021001011002112001322894086904320825819915321338092161652812210001519612445137441000200010002866828662286202872128697
6300428699223100000002000478028355101165834004100020041000100020001000500050002385270722761285692877531040001000200020002000286652859911610011000100001000021001041001002001326093976925317006220006322638141160652812810001535012633140601000200010002869228779287032865228741
6300428663222000000002000467128301011165954004100020041000100020001000500050002381060022730285812871331040001000200020002000286532863811610011000100001000021001011000110001334694097013318106219931322338202063672811210001536112575138691000200010002874228731285992868528657

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.8b, v1.8b }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0054

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70205140057112510110000200014004513965981901565011030010100024024330000100001237044533169916115305140036140065140060130733313116180100302001000030000602002000030000140060140041115020110099100401001000010000010010001111000200111000001110032101121111397875000014013100002000050100140062140064140061140043140061
70204140235112612000000200014004613959925901065010030006100004010030000100001237035533177516115305140036140061140060130736313116380100302001000030000602002000030000140060140041115020110099100401001000010000010010001211000100041000211110032101121111397405000001314100002000050100140062140061140042140061140061
7020414006011251000000020001400451396022590106501003000610000401003000010000123701753316991611553914003614006014006313073631311608010030200100003000060200200003000014006014005711502011009910040100100001000001001000211100010001010000111100321011211113973350000131016100002000050100140061140061140175140377140061
7020414006011251011000020001401461396026439068550523301131000040406300001007812581705339086161302571406191409041407131309291121316118189731181103213097562625204843085314080414061371502011009910040100100001000001001000810100090001928610010111100339811671113992450000131113100002000050100140061140156140153140158140158
70204140155112412000001134000140218139683539014550100300101000040241300001004012370445331387161106041400361401451400411307361713121980400302001004030000604462000030121140060140147215020110099100401001000010000010010001111000102141000011010032101106111397275000001013100002000050100140061140148140061140061140061
7020414005711251012000113400014002613960225901035011030006100004010030000100391237044533169916115726140036140060140060130769313116580100302001000030120602002000030000140060140060115020110099100401001000010000010010005211000100011000111112032331110011401155000091486100002000050100140057140058140133140060140051
7020414015011241000000013001140042139702539010350100300061000040100301211000012370085331587161093161400341401541400561307323131159801003020010000300006020020000300001400571401431150201100991004010010000100000100100041110001011410001011100321011211113971350010969100002000050100140057140059140058140057140042
702041400591126110100101400114014113959844901065010030010100014010030000100001237053533363116116432140108140148140148130732161311608010030200100003012260450200003000014014914005521502011009910040100100001000001001000511100020133257100010111103210110111139841500211009100002000050100140138140151140036140036140053
70204140150112501010000145000140038139731799010350100300091000040382301181000012369345335354161217071405431407331408551310529013169582514314321194136199704602008030245140050140035115020110099100401001000010000010010006001000302032681000100100032332801113978950000969100002000050100140052140049140226140229140244
70205140149112501010021145000140213139631829012450110300041000140100301201003912490795333285161134831400131400931400501307841613127280411304431000030000604422000030242140222140050115020110099100401001000010000010010001011000001063881000010104032571951213971750011009100002000050100140146140051140138140052140145

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0054

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140053108610000000600001400321396560259001350010300061000040010300001000012459615333517161099080140032140056140056130756313121280010300201000030000600202000030000140056140056115002110910400101000010000110100000110000003100000010031405875413972250000969100002000050010140042140042140057140042140042
70024140053108511010000200001400411396500259001050010300031000040010300001000012459075333285161147790140023140035140035130734313119480010300201000030000600202000030000140050140047115002110910400101000010000010100021110002004100001111031406875713972850000960100002000050010140051140048140048140036140051
70024140047108500000000100001400351396410259001650010300061000040010300001000012459615333517161154150140017140056140056130755313121280010300201000030000600202000030000140056140056115002110910400101000010000110100000110000100100001010031406875513972250000099100002000050010140057140054140042140057140054
70024140053108610000000701001400261396510259001350010300031000040010300001000012459075333285161147790140026140036140035130746313120980010300201000030000600202000030000140050140050115002110910400101000010000110100021110001011100000111031406874413972950000669100002000050010140053140048140048140036140051
700241400501085000000001600001400361396560259001350010300061000040010300001000012459615333517161099080140032140067140056130755313121580010300201000030000600202000030000140056140053115002110910400101000010000110100000110000000100001010031405875513972250000990100002000050010140054140057140057140057140057
700241400561086110000008275280001400411396564259001350010300032077240010300001000012458565332706161148961140026140057140041130756313121380010300201000030000600202000030000140051140051115002110910400101000010000110100000110000006100001010031405874513972650000131018100002000050010140055140058140058140055140055
70024140057112500000000160001140039139635025900135001030003100004001030000100001245943533270616113346014001414005414005413077831324048001030143100003000060260200003000014039614005111500211091040010100001000001010000011000020310000101004890874315151453735074010031100002000050010140058140058140037140055140036
700241400541125000000001200001400391396540259001350010300031000040010300001000012459435332706161133460140027140054140054130734313121380010300201000030000600202000030000140054140051115002110910400101000010000010100000010000000100000010031404875413972650000008100002000050010140055140052140055140055140059
700241400541049000010002200001400391396540259001350010300031000040010300001000012459435333441161151810140030140054140054130753313121380010300201000030000600202000030000140054140051115002110910400101000010000010100000110000000100001010031405875513972650000131318100002000050010140056140057140055140052140055
70024140054104900000000100001400391396510259001350010300031000040010300001000012458475333441161151810140030140054140054130756313121380010300201000030000600202000030000140054140052115002110910400101000010000110100000110001000100001010031404875413972850000131318100002000050010140055140055140055140036140052

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.8b, v1.8b }, [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0075

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514005810860000010140000140065139621259010650100300061000040100300001000012399685332449161168581140113014006514007813075103131168801003020010000300006020020000300001400751400751150201100991004010010000100000100100018110001010110001101000321061213613974550000131010100002000050100140076140076140076140076140066
702041400551085000000020001140064139618539010350100300061000040100300001000012372145332455161174261140031014006514006513075103131158804053020010000300006020020000300001400751400651150201100991004010010000100000100100030110000000010000111100323351216613974650000131313100002000050100140080140080140080140080140080
702041400791086100010010000140040139617259010350100300001000040242300001000012371955332342161161231140055014005714007913074903131183801003020010000300006020020000300001400791400811150201100991004010010000100000100100021110002011410000101000323461214613974550029131313100002000050100140056140076140076140349140163
702041401601087000102221341760011401461398008290137501523001010003403843012010118123860653350341611441311401850140156140248130760042131279807083337911132333906665622104303671407571401594150201100991004010010000100000100100070110004000963510003110130321061213613975250000131013100002000050100140080140080140080140081140081
702041400791086100110010000140060139621259010050100300031000040100300001000012371795332262161161231140052014008014007613075203131190801003020010000300006020020000300001400791400761150201100991004010010000100000100100021110001010110000101000321061213613974550000131013100002000050100140076140076140066140076140066
7020414007510860001000140000140064139621259010650100300061000040100300001000012371875332297161174261140051014007514007513075203131178801003020010000301426020020000300001400751400751150201100991004010010000100000100100000110000000010000110100321061213613974950000101313100002000050100140080140080140077140080140080
70204140079108610011001000014006013961742901035010030003100004010030000100001237083533226216116123114005501400791400791307550313117980100302001000030000602002000030000140079140076115020110099100401001000010000010010001111000201011000010000032106121361397455000013013100002000050100140076140079140076140076140076
7020414007510860001000140000140064139622259010650100300061000040100300001000012372145332411161174260140041014007514005513074103131178801003020010000300006020020000300001400751400651150201100991004010010000100000100100000110000010010000111100321061216613975250000131313100002000050100140108140077140080140077140080
702041400791085100000010000140060139616259010350100300031000040100300001000012371795332262161161231140052014007914007913075503131179801003020010000300006020020000300001400771400761150201100991004010010000100000100100011110002000110000100000321061213613974550000131013100002000050100140066140076140076140056140076
70204140075108500211002000014006413961925901065010030006100004010030000100001237214533248916117426014004101400751400761307510313116880100302001000030000602002000030000140065140078115020110099100401001000010000110010000011000000031000001110032106121661397295000013100100002000050100140080140080140058140077140080

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0054

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7002514004910490000000370100140039139652259001350010300031000040010300001000012458975333249161150040140015014005114005213074831312168001030020100003000060020200003000014003614004911500211091040010100001000001010000011000000001000001031405872213971150000969100002000050010140050140053140053140056140056
700241400521086000000010000140123139652259001350010300031000040010300001000012459255333249161150040140029014014714005213074831312148001030020100003013960020200003000014003614003611500211091040010100001000001010000011000000031000011031402982213972450000660100002000050010140053140050140053140050140053
7002414003610860000100088000140037139649259001350010300031000040010300001000012458975333361161150040140025014004914005213075131312088001030020100003000060020200003000014005214004911500211091040010100001000001010000001000000001000011031642873313972150000909100002000050010140050140053140053140053140053
7002414004910850001000130001140042139636259001350010300031000040151300001000012459255333400161150040140028014005314005213075131312118001030020100003000060020200003000014014514004911500211091040010100001000001010000011000000032451000010031402872213972450000969100002000050010140102140142140037140050140053
700241400541085000000010000140130139673259001350010300001000040010300001000012458655332745161134600140028014005214003613075131311958030830020100003000060020200003000014005214004911500211091040010100001000001010000011000504031000001031632873313972450000999100002000050010140053140050140050140050140050
7002414003610850000001100001400211396522590013500103000310000400103000010000124592553327451611500401400280140052140052130778313121180010300201000030123600202000030000140052140052115002110910400101000010000010100000110000000010000110314028733139724500009106100002000050010140050140053140053140053140050
70024140052108600000002588000140128139652259001350010300031000040010300001000012459345333361161150040140029014009714005013075131312118001030020100003000060020200003000014005214003611500211091040010100001000001010000001000001001000011031402872213981050000966100002000050010140050140053140050140050140140
700241400361086000000060000140037139652259001350010300031000240010300001000012458975332745161150040140025014004914003613074831312088001030020100003000060020200803000014003614004911500211091040010100001000001010000011000000031000010031402873313972450000969100002000050010140053140053140053140053140050
700241400361086000000013880001400381396492590010500103000310000400103000010039124595253334401611500401400310140052140054130735161311958001030020100003000060020200003000014005214005021500211091040010100001000001010001011000000031000111031402873313972450000900100002000050010140053140053140137140037140050
7002414004910860000100100011400371396522590013500103000310000400103000010000124585653335911611500401400280140052140036130751313119580010300201000030000600202000030000140036140036115002110910400101000010000010100000110008010960510001112318621032413995450020009100002000050010140211140256140331140242140353

Test 4: throughput

Count: 8

Code:

  ld2 { v0.8b, v1.8b }, [x6], x8
  ld2 { v0.8b, v1.8b }, [x6], x8
  ld2 { v0.8b, v1.8b }, [x6], x8
  ld2 { v0.8b, v1.8b }, [x6], x8
  ld2 { v0.8b, v1.8b }, [x6], x8
  ld2 { v0.8b, v1.8b }, [x6], x8
  ld2 { v0.8b, v1.8b }, [x6], x8
  ld2 { v0.8b, v1.8b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f223a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
240205800416201100110032000800261661625320164801001600628000080100160000800004408211375837498263618002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000100800086238002600029800186162370511011611800381800009980000160000801008004280042800428004280042
2402048004162011010000440008002616612253201648010016006280000801001600008000044082113758371982550780022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800001008000762480027000258001861262360511011611800380800009980000160000801008004280042800428004280042
24020480041621100000003100080026166122532016480100160062800008010016000080000440821137583739826361800228004180041499240349999320100200800001600002001600001600008004180041118020110099100100800008000010080007723800250012580019612423715110116118003818000010980000160000801008004280042800428004280042
2402048004162010110000310008002616610253201148010016006280000801001600008000044082003758373982550780022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800001008000682380026000298001960262371511011611800381800009080000160000801008004280042800428004280042
2402048004162011001000310008002610612253201648010016001480000801001600008000044082013758373982636380022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800001008000672380025001258001961252370511011611800380800009980000160000801008004280042800428004280042
240204800416211110100031000800261061125320162801001600648000080100160000800004408215375837498263778002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000100800087238002500025800196172361511011611800380800009980000160000801008004280042800428004280042
24020480041621101000006000800260661625320114801001600128000080100160000800004408211375837798263638002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000100800076268002701098001960262361511011611800380800009980000160000801008004280042800428004280042
24020480041621101100007000800261061425320164801001600628000080242160000800004408211375836198263718002280041800414992403499993201002008013116000020016000016000080041800411180201100991001008000080000100800068248002900068000001252471511011611800380800009080000160000801008004280042800428004280042
24020480041621111100003100080026066162532016480100160014800008010016000080000440821137583739826361800228004180041499240349999320100200800001600002001600001600008004180041118020110099100100800008000010080007723800250007800196172370511011611800380800000980000160000801008004280042800428004280042
240204800416201101000031001800261061925320114801001600148000080100160000800004408206375837398269258002280041800414992422349999320100200800001600002001602661600008004180041118020110099100100800008000010080008708000601028800006172370511011611800381800009080000160000801008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23243a3f4346494c4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
240025800416210000001002000008002610000425320050800101600328000080010160000800004407696375836198255191800220800418004149947003500223205302080000160000201600001600008004180041118002110910108000080000010800000140800130001380011611018000502081698800380800009680000160000800108004280042801818004280042
24002480041620000001000190000800261660002532004280010160000800008001016000080000440769637619059826742180022080041800414994702135002232001020800001600002016000016000080041800411180021109101080000800000108000001808001400014800130110140005020715868003808000012680000160000800108004280042800428004280042
24002480041644000000000190000800261660046732005080010160040800008001016000080000440768937583769825493180128080041800414994700350104320010208000016000020160000160260800418004111800211091010800008000001080000016080010000168001300100000502081578800381800949680000160000800108004280042800428004280042
240024800416430000000101900008002616600025320048800101600408000080010160000800004407696375837298256971803460800418004149947003500223200102080000160261201607821607978035580347318002110910108000080000010802700008028620015028000060100000502073579803661801889980000160000800108049180478803728071780489
2400248048264600110010028000080026166000253200508001016004080000800101600008000044076903758361982512518002208004180041499470035002232001020800001600002016026616000080041800411180021109101080000800000108000001408000000014800146112180005047815788003818000010680000160000800108004280042800428004280042
24002480041643000000000132000080026166000253200428001016004080000800101600008000044076963758376982569518002208004180190499470035002232001020800001600002016000016000080041800411180021109101080000800000108000001408001300008001361014000502081586800381800009680000160000800108004280042800428004280042
24002480181643000000010000008002616600025320050800101600408000080010160000800004407696375837498255091800220800418004149947003500223200102080000160000201600001600008004180041118002110910108000080000010800870140800140101480014011022000502081578800381800009980000160000800108004280042800428004280042
240024800416430000011003200008002616600225320074801051600628000080010160000800004407696375837498272141800220800418004149947003500223200102080146160000201602561600008004180041118002110910108000080000010800000140800140101480009011018000502051587800380800006680000160000800108004280042800428004280042
24002480041643000000000310010800261600002532004880010160000800008001016000080000440985737583619825511180452080041800414994700350022320010208000016027020160000160000800418004111800211091010800008000001080000000800140001480013611014090502051587800381800939780000160000800108004280042800428004280042
240024801826430000001002000008002610600102532004080010160000800008001016000080000440768937583769825139180022080041800414994700350022320010208000016000020160000160000800418004111800211091010800008000001080000000800090001480014601018000504081588800381800009080000160000800108004280042800428004280182