Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (single, post-index, S)

Test 1: uops

Code:

  ld2 { v0.s, v1.s }[1], [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 4.006

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2223243a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
630052910323411512410010000474028648001700540061000200610001000200410025005505324028622726288442908931040001000200020004000288962878711610011000100011001221001001110002121001303891666824310785320310331838161954492848110001610713059143261000200010002911529134290232919528942
6300429133234115017022133176100472728748001720140101001200810001000200210005005504823992322688289562919571040041000200020024004289862892531610011000100001005421006106381100121216013122930269223097115020580330838102657522836510011582612762138461000200010002922329165292162911929077
6300429041234119121211144881004718288690016998400810012008100010012000100150055005239661227512884129146328400010002000200040082894429135316100110001000010044210040067861000212133251313192276875306176120700329738132153602874710001586413124143921000200010002913629170291532910229116
630042888323202012000030000463528429001657840061000200610001000200010005000500023869622744286412885731040001000200020004000287042873411610011000100001003021002001110002121201315193976931309155520425327438131760562822810001561412387140861000200010002880228786289172882628751
630042872523012111610030100477128445001678840061000200610001000200010005000500023878122719286862870231040001000200020004000287892867111610011000100001002221002000110002121001325894226971309585620211332538191448492829310001553912795138281000200010002880928933288142879128883
6300428912232122116000300004703285480016813401010002006100010002000100050005000238621226792871428925310400010002000200040002873028677116100110001000010022210020011100021212013264939469303186104720191321238132354522848410001571412602140891000200010002893529033289642895729131
63004288432311141211001288000467528554001686040061000200610001001200010005000500023870522704290192954231040001000200020004000285512860511610011000100001004221003002110002131001331596526944323165420015326038151151522833810001542912474136781000200010002866528659286562867528590
630042876522311311400030000485628328001662040061000200610001000200010005000500023896522768286082865731040001000200020004000285462861611610011000100001002221002002110002121101316197066932320075619987322938201849532814510001557412542136011000200010002867628809287602873728615
630042870622212001710010100473428369001663640061000200610001000200010005000500023868422726285942877831040001000200020004000286162870811610011000100001002221003001410002121001328095916942318785219958326238151748522818410001537712545136631000200010002861928659287312870328621
63004287982221151161001350100474328325001645940061000200610001000200010005000500023876522763285522877631040001000200020004000286372864311610011000100001002121001001110002121001316095557014319695120141317138181855502818010001527012339137501000200010002864828699286312876328560

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.s, v1.s }[1], [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514005710851100101001010001400451396492590100501003000310000401003000010000123702653325051611572614003701400571400571307363131160801003020010000300006020020000500001400571400571150201100991004010010000100000100100012110000000100001111132441801113973050000131014100002000050100140042140061140110140078140055
70204140054108600000000010000014003613966125901035010030003100004010030000100001236990533147116114560140011014003514005413071131311548010030200100003000060200200005000014005614005211502011009910040100100001000001001000001100000001000010100321011211113972150000101010100002000050100140036140038140149140058140036
7020414003510850000000001010001400361396612590103501003000310000401003000010000123696353313571611456014002701400511400511307273131157801003020010000300006020020000500001400541400351150201100991004010010000100000100100000110000300100001010032101121111397265000001013100002000050100140036140036140114140105140060
702041400511085000001000100000140039139645259010350100300001000040100300001000012369035331512161145601400270140051140051130730313116080100302001000030000602002000050000140054140051115020110099100401001000010000110010000001000000010000100003210112111139724500000010100002000050100140052140036140108140083140052
702041400511086000001100100000140039139599259010350100300031000040100300001000012369905331471161145601400270140035140054130711313115480100302001000030000602002000050000140056140051115020110099100401001000010000010010000011000010310000001003210112111139721500001300100002000050100140036140052140128140040140036
702041400511086000000000130010014003914102325901035010030000100004010030000100001236963533135716114560140030014005414005113071131311388010030200100003000060200200005000014005614003511502011009910040100100001000001001000001100000031000010100321011211113970550000101313100002000050100140055140143140058140124140055
702041400511085000000100400100140020139745259010350100300031000040100300001000012369905331357161147881400300140054140150130727313114080100303221000030000602002000050000140054140035115020110099100401001000010000010010000011000410010000000003210112111139725500000100100002000050100140036140052140129140067140036
70204140054108500000001010010014003913961525901035010030003100004010030000100001236990533147116114560140011014005414005413073031311388010030200100003000060200200005000014005114003511502011009910040100100001000001001000001100000001000010100321011212113972450000101010100002000050100140054140156140321140060140055
702041400541085001001000140010014004213967625901035010030003100004010030000100001236903533147116114560140027014005114005113071131311578010030200100003000060200200005000014005114005211502011009910040100100001000011001000001100002001000410100386711181113995350031101013100002000050100140150140124140235140216140239
702041402161086010000143396264010014040713967711090135501343001010003403843034910078125465753346371611562714010801403241404211307913013134780399302001000030000602002000050000140054140051115020110099100401001000010000010010000011000000010000001003210112111139724500000100100002000050100140059140065140128140066140052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f233a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7002514005110860100000013000140039139654259001350010300031000040010300001000012459435333481161153720014003014005414005713075331312138001030020100003000060020200005000014005414005111500211091040010100001000001010000011000009503100001100314000138715141397265000013013100002000050010140055140055140055140055140115
700241400541086000000001300014003913965425900135001030003100004001030000100001245916533340516115181001400271400541400561307533131213800103002010000300006002020000500001400541400511150021109104001010000100000101000000100000139001000011003140001387101513972350000131313100002000050010140052140055140055140055140083
7002414005510860000000010001400391396542590013500103000310000400103000010000124594353327061611518100140030140036140054130753313121380010300201000030000600202000050000140054140051115002110910400101000010000010100080110007013163181000511003140001087141013972650000131313100002000050010140055140058140056140052140130
700241400511086000000001000140039139654259001350010300031000040010300001000012459435333441161151810014001114005414003513075031312138001030020100003000060020200005000014005414005411500211091040010100001000001010000011000009100100001100314000138714131397265000013013100002000050010140055140056140056140055140093
700241400541086000000001000140036139654259001050010300061000040010300001000012459435333558161151810014003014005114005613075331312138001030020100003000060020200005000014005914005211500211091040010100001000001010000011000009803100001100314054148714131397265000013013100002000050010140057140052140055140055140101
70024140075108500000100100014003813965125900135001030007100004001030000100001245943533344116115181151400301400541400511307503131213800103002010000300006002020080500001400351400551150021109104001010000100000101000001100000142031000011003140009101131013970750000131013100002000050010140058140055140036140055140110
700241400511086000000001450001400391396542590013500103000310000401513000010000124594353335931611518110140030140056140035130753313119480010300201000030000600202000050000140054140051115002110910400101000010000010100000110000093001000011003140001387101113970750000131313100002000050010140059140055140055140055140042
700241400541085000000001000140039139654259001350010300031000040152300001000012459435333441161133461514003014005414005113075331312138001030020100003000060020200805000014005514005111500211091040010100001000001010000011000009303100001000314000138710131397275000013010100002000050010140055140055140056140056140091
700241400541086000000101330001401361405961099002850043300151000440294303541016612505185338165161256850014017114024414014213080243131250803103026410122302436050620324502031404091403283150021109104001010000100000101000521100030612652010003112032105317121141313972650000131313100002000050010140036140036140052140036140112
7002414005410860000000000001400391396542590013500103000310000400103000010000124594353327061611518100140011140054140055130753313121680010300201000030000600202000050000140054140054115002110910400101000010000010100000110000091001000011003140531287141113972750000131013100002000050010140036140055140055140055140149

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.s, v1.s }[1], [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0074

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243f43494d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)d9ddfetch restart (de)e0e2? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70205140063108501000001000014005910139616259010650100300061000040100300001000012371695331504161150331400501400741400741307503131177801003020010000300006020020000500001400741400741150201100990100401001000010000010010000021000104041000111100032101121011139796050000666100002000050100140075140075140075140056140450
7020414007410850000000775000014005911139616259010350100300061000040100300001000012371965331504161171921400501400741400741307503131177801003020010000300006020020000500001400741400751150201100990100401001000010000010010000021000103011000101100032101121011139744050000666100002000050100140075140076140077140075140080
70204140074108500000001000014210511139616259010650100300061000040100300001000012371695332221161171921400501400741400741307503131177801003020010000300006020020000500001400741400741150201100990100401001000010000010010000021000001011000111100032101121011139744050000066100002000050100140056140056140077140075140078
70204140074108500000002000014004011139616259010650100300031000040100300001000012371695332221161171921400501400741400741307503131177801003020010000300006020020000500001400741400741150201100990100401001000010000010010000021000106011000111000032101121011139744050000667100002000050100140075140075140075140075140083
70204140055108600001002000014006011139597259010650100300061000040100300001000012371195331504161175431400501400771400741307313131177801003020010000300006020020000500001400741400741150201100990100401001000010000010010000001000102011000111100032101121011139744050000666100002000050100140075140075140075140075140077
702041400741086000010020000140040111396162590106501003000610000401003000010000123716953315041611503314003114005814007413075017131177801003020010000300006020020000500001400741400552150201100990100401001000010000010010000021000004011000111100032101121011139725050010666100002000050100140075140075140170140075140198
702041400551086000000014000014004010139616259010650100300061000040100300001000012371785332221161171921400501400741400551307503131177801003020010000300006020020000500001400741400741150201100990100401001000010000010010000021000104011000111100032331121021139744050000666100002000050100140075140075140075140075140079
70204140074108600000002000014005911139616259010650100300061000040100300001000012371195332221161171921400511400741400761307503131177801003020010000300006020020000500001400741400741150201100990100401001000010000010010000021000200011000111100032101121011139744050000066100002000050100140075140075140075140056140075
7020414007410860000010700001400611113961625901065010030006100004010030000100001237169533222116115033140051140074140156130750313117780100303221000030000602002000050000140074140055115020110099010040100100001000011001000002100011104100031110003210195011142039050000906100002000050100140161140156140244140075140165
70204142721110011001322712640001402481013982811090173501103002210003403923035410155124634853351531613631214018514033814035413080543131385813033056110080302446117420162506301402361403284150201100990100401001000010000010010000021000105011000101000032101121011139744050000667100002000050100140078140075140075140075140076

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0058

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23243f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140054108600000001000140039139658259001350010300031000040010301161000012459795333440161155171401291400581400611307571231312178001030020100003000060020200005000014005814005411500211091040010100001000010100000110000001000011000314004874413973050000131013100002000050010140059140059140059140059140055
700241400601086000000010001400331396582590013500103000310000400103000010000124585653334801611551714007314005814003613075731312178001030020100003000060020200005000014005914005411500211091040010100001000010100000110000061000011000314004874413973050000131013100002000050010140059140056140061140059140059
70024140058108500001001300014004313965825900135001030003100004001030000100001245979533359116115634140030140058140058130757313121780010300201000030000600202000050000140058140058115002110910400101000010000101000001100002610000110003140031033313973050000131010100002000050010140059140059140059140059140037
70024140036108500000001000140039139658259001350010300031000040010300001000012459795333440161155171400331401561400581307573131195800103002010000300006002020000500001400571400581150021109104001010000100001010000011000000100000100031630587111213999650032141413100002000050010140353140245140428140249140351
700241425231097101003339717600140140139721109900435004030010100044043530353100791252941533673616134158140316140250140303130806411313708120630390100803024560756203225031114034514033221500211091040010100001000010100000110000101000011000314003873313973050000131013100002000050010140059140059140059140059140059
700241400581086000000060001400431396582590013500103000310000400103000010000124597953335911611551714003014005414006113075731312138001030020100003000060020200005000014005814003611500211091040010100001000010100000110000131000011000314003873313973050000131313100002000050010140059140059140059140059140059
70024140058108600000001000140021139659259001350010300031000040010300001000012459795333591161159741400301400541400541307533131217800103002010000300006002020000500001400581400541150021109104001010000100001010000001000003100001100031400487331397305000013013100002000050010140059140059140037140059140059
700241400571086000000010001400431396582590013500103000310000400103000010000124597953335911611575114009914005514005413075731312178001030020100003000060020200005000014005814005811500211091040010100001000010100000110000001000011010314004904413973050000131013100002000050010140059140059140059140059140059
70024140036108600000001000140043139658259001350010300031000040010300001000012459795332745161155171401071400581400541307573131213800103002010000300006002020000500001400581400541150021109104001010000100001010000011000010100001100031400487441397305000013013100002000050010140059140037140059140037140059
700241400581085000100010001400431396622590013500103000310000400103000010000124585653335911611346014008214005814003613075731312178001030020100003000060020200005000014005814003611500211091040010100001000010100000010000001000011000314004873313972650000131313100002000050010140059140059140059140059140060

Test 4: throughput

Count: 8

Code:

  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6], x8
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f222324373a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd alu (9a)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
40020580042621011000000310000611800271600026320236801001600798000080100160000800004446522375768099836708002380042800422992026329999320100200800001600002001600003200008004280042112402011009910010080000160000800001008000772380026041068001861252360151120316558003918000099080000320000801008004380043800438004380043
400204801846200100100003100001180027106042632011280100160093800008010016018080000444652237576809983560800238004280042299004329999320100200800831600002001600003200008004280042112402011009910010080000160000800001008000672380027010258010261252370151120616548015018000099080000320000801008004380043800438004380043
40020480042620011000000310000118002716600263201198010016007980000801001600008000044464413757704998454880023800428004229904433008232010020080000160000200160000320000800428004211240201100991001008000016000080000100800087080108040108001961252361151120316558003918000099080000320000801008004380043800438004380043
40020480042621010020000320000271800271060226320222801001602788000080100160000800004446566375768099841378002380184800422992403299993201002008000016000020016000032000080042800421124020110099100100800001600008000010082000723800260501780019617061151120316538003918000099080000320000801008004380043800438004380043
400204801856200100000003101001618002716603726320133801001600908000080100160000800004447142375727299833978002380042800422992020329999320100200800821600002001600003200008004280042112402011009910010080000160000800001008000682380008050328001861252371151370516558003918000099080000320000801008004380183800438004380043
4002048004262101000000070000761801691660023032016880100160077800008010016016580000444552037571859983560800238004280042299042032999932010020080000160000200160000320000801778004211240201100991001008000016000080000100800077238002703029800180162370151120716538003908000099080000320000801008004380043800438004380043
4002048004262101101000037000010280027160002632017280100160097800008010016000080000444607637574799983386800238004280042299040329999320100200800831600002001600003200008004280042212402011009910010080000160000800001008000862380025010258001961252370151120516458003908000099080000320000801008004380184800438004380043
4002048004262001000000031000010180174166002632020180100160061800008010016000080000444698237576809983466800238004280042298536329999320100200800001600002001600003200008004280042112402011009910010080000160000800001008000770800250280258001961252360151120416548003918000090080000320000801008004380043800438004380043
40020480042621011000000310000161800271660026320177801001600928000080100160000800004446076375748499844528002380042800422991138329999320100200800001600002001600003200008004280042112402011009910010080000160000800001008000762380026020268001961252360151120416538003908000099080000320000801008346784059834908043580043
40020480042620010010000460000118002716600211320114801001600758000080100160000800004443936375771399835358002380042800422989920329999320100200800001600002001600003200008004280042112402011009910010080000160000800001008000862380025001288001861262370151120516358003918000099080000320000801008004380043800438004380043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)09l2 tlb miss instruction (0a)0e0f18191e1f2324373a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd alu (9a)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)c9cfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4000258004265210000000023000169800271600026320010800101600668000080010160000800004446669375768999839021310800238004280042299471333002132001020800001600002016000032000080042800421124002110910108000016000080000010800000198001600000800176017190001505755301992427231158800391800002431380000320000800108004380043800438004380043
400024800426200000000002100007800270660026320070800101600618000080010160000800004446669375752699839221110800238060680042299470330021320010208000016000020160000320000800428004211240021109101080000160000800000108000002180016000017800176014210001505758321911173125121010800390800002821380000320000800108004380043800438004380043
40002480042620000000000230001578002716600263200108001016004580000800101600008000044466693757676998233712108002380042800422994703300213200102080000160000201600003200008004280042112400211091010800001600008000001080000008000000000800006115210101505958312110172924128980039080000267080000320000800108004380043800438004380043
4000248004262000000000022000088002716600263200108001016006180000800101600008000044466693757676998389512108002380042800422992803300213200102080000160000201600003200008004280042112400211091010800001600008000001080000008001600001680016611421000150595830191117312412111080039080000265080000320000800108004380043800438004380043
400024800426200000000002300010780027066002632001080010160000800008001016000080000444666937576769983290121080023800428004229947033002132001020800001600002016000032000080042800421124002110910108000016000080000010800000198000000000800166002100015061552918101729231185800391800002451380000320000800108004380043800438004380043
400024800426200000000002200010880027106042632006980010160000800008001016000080000444666937575269983893111080023800428004229947033002132001020800001600002016000032000080042800421124002210910108000016000080000010800000218001600001780016611621000150635829181017272311117800390800002451180000320000800108004380043800438004380043
4000248004262100000000023000088002706600263200578001016004580000800101600008000044466693757703998251512108002380614800422994703300213200102080000160000201600003200008004280042112400211091010800001600008000001080000021800000000380017011500001505964301810172924128880039080000245080000320000800108004380043800438004380043
400024800426200000000000000177800270660026320010800101600558000080010160000800004446143375770399832901210800238004280042299470330021320010208000016000020160000320000800428004211240021109101080000160000800000108000002180017000008000060141900015059552918101731231191080039080000233080000320000800108004380043800438004380043
4000248004262000000000020000078002710003263204208001016000080000800101600008000044460203757676998410313108002380042800422994703300213200102080000160000201600003200008004280042112400211091010800001600008000001080000021800160000080015611721000150645830199173125131011800390800002801080000320000800108004380043800438004380043
400024800426200000000002300007800271060026320055800101600458000080010160000800004446669375767699832901210800238004280042299470330021320010208000016000020160000320000800428004211240021109101080000160000800000108000002180000000017800146114210001505955301911162723111011800391800002651080000320000800108004380043800438004380043