Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2R (post-index, 4H)

Test 1: uops

Code:

  ld2r { v0.4h, v1.4h }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 4.006

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.006

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e223a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
63005292892262150310050470828614011718340061000200810001000200010005000500123908122274129190293303104000100020002000200029138292901161001100010001002331002011100031211129319286691731081372055131483808940342840210001606913403144261000200010002924929272294222925129168
6300429209227011001104045622878600171204008100020081000100020001000500050002391652278429041292493104000100020002000200029188292061161001100010001002321002011100031311134549245690431131402058432233811643372852310001627613084146371000200010002937729249292462926729199
63004293052260110110040461928723001710040081000200810001000200010005000500023908622758290692929431040001000200020002000291662924711610011000100010022310020111000313101299692416992320504220596327838111345412846310001603013198143481000200010002919629384291692923029342
6300429332228010110005046532892000172004006100020061000100020001000500050002391142276129126292763104000100020002000200029424297421161001100010001002131003011100021211130739233694431300362071534293808843352869310001638913152145501000200010002960229587292502922929415
6300429443227010101003046062872000171534008100020081000100020001000500050002388042273029003291613104000100020002000200029103290581161001100010001002231003002100022311129439241690731100392043731533813738422838510001609513419143841000200010002914229302292772915029299
63004296902280111010020462928767101707240061000200810001000200010005000500023900722682291142923231040001000200020002000291622909311610011000100010012310020021001213111283391546880309013820548303538171040352835110001650613312145211000200010002913029163292742918829272
63004292942180101101140455928753011740140081000200810001000200010005000500023872522744292682933331040001000200020002000292382955911610011000100010032210020111001222121292491546839306503720665307438161037342835810001643313437144271000200010002923229225292472918229133
63004292302190111011140457328766001713440061000200610001000200010005000500023896822764290842920531040001000200020002000290962908611610011000100010021210030021000223111287390506899311903520553303938161039382839810001630113299145341000200010002930429273293022928329255
6300429199220011111104045342872210171984008100020061000100020001000500050002389852276829011292533104000100020002000200029122291941161001100010001002131003012100122210128709144692431211412066530713811740392835210001641913235145301000200010002924429218292252929329204
6300429166218010100113045182874800171254008100020061000100020001000500050002389922272629054292513104000100020002000200029146290671161001100010001001221003011100121311130139431685731020352060530533814746412837810001648013310144471000200010002928029245292102918229264

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.4h, v1.4h }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f23243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70205140057108600000010016000014003913957725901035010030003100004010030000100001236903533135716114560001400301400541400351307313131154801003020010000300006020020000300001400541400541150201100991004010010000100000100100000010000943100000010000032100011211113972450000131014100002000050100140052140052140055140052140053
70204140051108500000010013000014003913959325901005010030000100004010030000100001236990533135716114560151400291400541400511307273131157801003020010000300006020020000300001400541400512150201100991004010010000100000100100000110000700100001010000032100011211113972450000131013100002000050100140036140055140055140036140066
70204140035108500000000010000140039139593259010350100300031000040100301391122112993185331357161145600014003014003514005513073031311578010030200100003000060200200003000014003514005121502011009910040100100001000001001000101100005800100011010000032100011211113972150011131010100002000050100140036140036140142140036140073
7020414005410850000000000000014004013959625901035010030003100004010030000100001236963533147416114560001400301400541400541307303131157801003020010000300006020020000300001400541400511150201100991004010010000100000100100000110001640010000101020003210001121111398115000001010100002000050100140053140038140055140055140049
7020414005410850000100011000014003913959625901035011030003100004010030000100001236999533135716114560001400301400541400541307303131157801003020010000301236020020000300001400581400351150201100991004010010000100000100100000110000823100001010000032105411211113972450000131313100002000050100140055140052140055140055140084
7020414005410860000000001000014003913959625901035010030007100004010030000100001236963533147116114560151400271400541400511307303131157801003020010000300006020023150355651424451400541150201100991004010010000100000100100030110000000100001010000032100011211113972450000131013100002000050100140052140112140056140057140040
70204140051108600000000013000014013613961025901035010030003100004010030000100001236903533151016111598001401071400351400541307293131164801003020010000300006020020000300001400351400351150201100991004010010000100000100100000110000460010000101000003210001121111397255000001313100002000050100140055140055140055140144140080
70204140054108500000000010000140132139596259010350100300001000040100300001000012369905331471161190800014009814003514005513073531311578042530200100003000060200200003000014005414005411502011009910040100100001000001001000001100008503100001010000032105411211113972450000131010100002000050100140055140054140036140148140085
70204140051108500000000013000014003613959325901035010030003100004010030000100001236990533147116114560001400301400541400351307313131157801003020010000300006020020000300001400511400511150201100991004010010000100000100100000110000840010000101000003210541121111398015001001013100002000050100140055140036140153140055142717
7020414032210880010000243697264000014040513966783901805012430019100044039930236101581242133533757216122268001401431402381403341308123013138580713305661008130363604482024430367140237140375315020110099100401001000010000010010003011000613703100001000000032105411211113973650000131010100002000050100140052140053140052140055140058

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0054

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f23243a3f494d5051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
700251400521085000000001000014002001396510259001350010300031000040010300001000012458745332706161155260140027014005114003513075031312108001030020100003000060020200003000014005414005111500211090104001010000100000101001321100040101610810008101000314007875513972350000131013100002000050010140055140055140055140057140058
7002414005410850000000010000140038110139657051900135001030003100004001030000100001245916533344616115998014003101400571401441307563131213800103002010000300006051220000300001400521400511150021109010400101000010000010100000110000000110000101000314063876513972350000131010100002000050010140055140036140055140055140036
70024140054108500000001100001400390139660638259001350010300031000040010300001003812459435332823161133460140011014005114005113075031312168001035164104863000060020200003000014005414005111500211090104001010000100000101000001100050101381000010100031860587351397255000001014100002000050010140052140036140055140036140148
70024140054108600000100100101400390139635025900135001030007100004001030000100001245943533270616113346014002701400511400511307503131211800103002010000300006002020000300001400511400511150021109010400101000010000010100000110000000010000101000314006875513972650010101010100002000050010140053140184140058140057140053
7002414005410850000000014400101400420139651025900135001030003100014001030000100001245934533332716113346014002701400541400541307503131213800103002010000300006002020080300001400541400511150021109010400101000010000010100000110000000010000100000314008876613972450000131010100002000050010140055140055140053140055140052
70024140035108500001000100101400390139651025900135001030011100014001030000100001245961533430716163421014003001400511400561307533131210800103002010000300006002020080300001400511400351150021109010400101000010000110100000110000000310000100000314005875713972650000101011100002000050010140058140058140062140059140058
700241400601086111000001400101400260139641025900165001030006100004001030000100391245997533366916115841014003601400411400611307273131219800103002010000300006002020000300001401551400581150021109010400101000010000010100021110002010110000111110314005875513972950000101010100002000050010140061140061140106140058140059
70024140154108610110000134001014004201396350259001050010300001000040153300001000012459435333441161133460140030014003514005113075031312108001030020100003000060020200803000014005414005111500211090104001010000100000101000001100000003100001010003140078743139707500001000100002000050010140055140036140036140052140055
70024140035108500000000101001400200139654025900135001030000100004001030000100001245916533364116115297114002701400571400541307533131329800103002010000300006002020080300001400541400351150021109010400101000010000110100000010000000010000101000314005878713972350000131510100002000050010140052141472140147140233140317
70024140145108701010122397176100140231013970308190046500343001110002404333035310038125070853380901611988801401760140055141198131431313121380313300201004030122602602000030120140144140035215002110901040010100001000001010001011000100732331000210000031400687671397235000010130100002000050010140036140052140036140036140052

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.4h, v1.4h }, [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0063

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd store (99)inst ldst (9b)9d9e9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
702051400691125000000013000014005213956525901035010030003100004010030000100001237083533149916114980140039140063140064130741313115880100302001000030000602002000030000140063140066115020110099100401001000001000000100100000110001023260100011100323318011139735500191009100002000050100140340140221140258140345140227
702041403541127101001339626400014041713977083901505013930021100014053530354100791251232533530116120277140169140447140351130741313115880100302001000030000602002000030000140056140066115020110099100401001000001000000100100000110000201652310000010032101801113973550000999100002000050100140064140064140064140109140067
7020414011211250100010100001400401395652590100501003000310000401003000010000124490253305301611647714003914006314006313073931311688010030200100003000060200200003000014006314005511502011009910040100100000100000010010000011000000010000111032101801113981350000069100002000050100140064140064140056140064140064
702041400631125000000010000140048139607259010350100300031000040100300001000012449115330530161162361400391400631400631307393131207801003020010000300006020020000300001400661400661150201100991004010010000010000101001000001100001018100001100321018011139736500009109100002000050100140071140064140067140056140064
7020414006311250000000100001400401395655490103501003000310000401003000010000124490253305301611670814003114006614006313073931312078010030200100003000060200200003000014006314006611502011009910040100100000100000010010000011000010610000010032101801113973850000699100002000050100140071140383140067140377140064
70204140063112501000002500001400481395682590103501003000310000401003000010000123709653305301611498014003914006314006413074031312078010030200100003000060200200003000014006314006611502011009910040100100000100000010010000001000000610000110032101801113995750000999100002000050100140056140064140064140064140068
70204140066112400000001300001400481396072590103501003000310000401003000010000124490253319171611623614004214006614006313073131312078010030200100003000060200200003000014006314006311502011009910040100100000100000010010000011000010010000110032101801113973550000999100002000050100140066140064140064140064140064
70204140091112400000001300001400481395972590103501213000310000401003000010000124490253305301611498014003914005514006313073131312078010030200100003000060200200003000014005514006811502011009910040100100000100000010010000011000000010000110032101801113973550000960100002000050100140064140064140067140064140064
702041400661125000000021000014004013959725901035010030003100024010030000100001244902533149916116236140039140063140063130740313116880100302001000030000602002000030000140064140066115020110099100401001000001000000100100020110007009346100051020328013293114170450049999100002000050100140251140344140250140246140252
70204140416112700000312642640001402291395682590100501003000010000401003000010000123708353306081611544814003914006314005513073931312078010030200100003000060200200003000014006314006111502011009910040100100002610000021001000020811000060310000110032331801113973550000069100002000050100140162140064140064140064140067

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0058

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0f18191e1f23243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch call (8e)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7002514005410860101001000014004313965925900135001030003100004001030000100001245979533366816115517014010301401541401591307573013126880605317271108631325607422032430122140427140339215002110090104001010000100001101001721100050106520100031123210031025814154250020131313100002000050010140148140264140434140232140333
700241403501088001013265264000140043139658259001050010300031000040010300001000012459795333591161134600140034014010614005813073531312178001030020100003000060020200003000014005614005411500211009010400101000010000010100000110000010010000110314005876613973150000131013100002000050010140059140059140037140059140055
70024140058108600000000000140043139658259001350010300031000040010300001000012459795333591161161770140020014003614005813076231312178001030020100003000060020200003000014005814005411500211009010400101000010000010100000110000010310000110314004874413973050000131013100002000050010140059140059140059140059140037
70024140036108500000010000140021139658259001350010300001000040010300001000012458565333591161157510140034014005814005813075731312178001030020100003000060020200003000014006014005811500211009010400101000010000010100000110000010610000110314002875413970850000131014100002000050010140059140059140060140059140059
700241400581085000000000001400461396362590013500103000310000400103000010000124597953335911611551701400340140058140061130757313121780010300201000030000600202000030000140060140058115002110090104001010000100000101000001100000000100001103140071453314015150000131013100002000050010140059140059140059140038140059
70024140058108610105588358400014004313965825900135001030003100004001030000100001245997533359116115517014003401400581400581307593131217800103002010000300006002020000300001400581400541150021100901040010100001000001010000011000000001000001031400287331397305000001013100002000050010140037140059140059140060140060
70024140054108600000010000140043139658259001350010300031000040010300001000012459795333591161155170140012014005814005813075731312178001030020100003000060020200003000014005814005411500211009010400101000010000010100000110000000010000110314003873513973050000101313100002000050010140059140062140059140059140059
70024140058108700000000000140021139654259001350010300001000040010300001000012459795333591161134600140034014005914006113075731312178001030020100003000060020200003000014005814005411500211009010400101000010000010100000110000010310000010314003875413973050000131313100002000050010140037140059140060140059140059
70024140106108600000010000140043139658259001350010300031000040010300001000012459975333630161155170140255014034214024813085029131354809053038310080303646050620158302431402431403543150021100901040010100001000001010025011002610271085100001003210051426413995550040131013100002000050010140337140244140432140253140251
70024140308108700100010000140043139658259001350010300031000040010300001000012459795333591161134600140034014005814005813075731312188001030020100003000060020200003000014006114005411500211009010400101000010000010100000110000010010000110314003872413973050000131313100002000050010140059140059140059140059140059

Test 4: throughput

Count: 8

Code:

  ld2r { v0.4h, v1.4h }, [x6], x8
  ld2r { v0.4h, v1.4h }, [x6], x8
  ld2r { v0.4h, v1.4h }, [x6], x8
  ld2r { v0.4h, v1.4h }, [x6], x8
  ld2r { v0.4h, v1.4h }, [x6], x8
  ld2r { v0.4h, v1.4h }, [x6], x8
  ld2r { v0.4h, v1.4h }, [x6], x8
  ld2r { v0.4h, v1.4h }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)d9daddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2402058004164301010100008002610622532010080100160040800008010016000080000440821537583769826340800228004180041499240349999320100200800001600002001600001600008004180041118020110099100100800008000001008000001480010001080013601314051101160011800381800009680000160000801008004280042800428004280042
24020480041643000000020008002616646253201408010016004080000801001600008000044082153758376982634580022800418018249924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800000080014109800106114140511011600118003808000091180000160000801008018480042800428004280042
24020480041644000001029008002606602532013880205160040800008037816000080000441052937619269826337801278018180182499240315005232010020080133160000200163780164026822848244818180201100991001008000080000110080266018801026703543801910113184512712500118014318009411680000160000801008201780042803238032580183
24020480181644001001210492641803061661391143208748028416039280092803811605378013344128193761921984380380022800418018249924163500803201002008000016026520016000016000080183800411180201100991001008000080000110080000208001400138001361100051101160021800381801857980000160000801008004280042800428018380042
240204800416430000000320080026166442532014680201160040800008010016027280000440821537583599834739800228004180041499242035008032010020080000160265200160000160000801818004111802011009910010080000800000100800000188001410980013601018051101160011801431800009980000160000801008004280042801828004280042
2402048004164300000002000800261660253201408010016004080000801001600008000044082153758376982590080022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800000080013001980013611018051101160011800380800009980000160000801008004280042800428004280042
240204800416430000000190080026166025320138801001600408000080100160000800004408215375837698259678002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000010080000018800140408001461918051101160012800381800009980000160000801008004280432800428043080042
24020480430642000000032008002616052532014680100160000800008010016000080000440819737583799826291801298004180041499240349999320100200800001600002001602721600008004180041118020110099100100800008000001008008901880017002180000611300511011601428003818000013080000160000801008004280042800428004280042
2402048004164300000003500800261064253201488010016004680000801001600008000044082153758375982567380022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800000188001710208001861141805110116001180038180000101380000160000801008004280042800428004280042
24020480041643000110044008002616642532013880100160040800008010016000080000440820937583789826343800228004180041499240349999320100200800001640362121613261600008004180041118020110099100100800008000001008000001480014101782046601018051101240021800380800959680000160000801008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494e4f5051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
240025800416201000100024010080026160000253200108001016000080000800101600008000044075913758361982628708002280041800414994703500213200102080000160000201600001600008004180041118002110910108000080000110800000080014020080013011418050222152280038080000131080000160000800108004280042800428004280042
240024800416200000110023010080026166000253200508001016004880000800101600008000044076873758366982574108002280041800414994703500223200102080000160000201600001600008004180041118002110910108000080000010800000188001700008000060022050201151280038180000131080000160000800108004280042800428004280042
24002480041621000000002001018002606600225320058800101600488000080010160000800004407696375836198257450800228004180041499470350022320010208000016000020160000160000800418004111800211091010800008000001080000018800000001480014601418050222151280038180000101080000160000800108004280042800428004280042
240024800416210000000012010080026166000253200508001016000080000800101600008000044076963758361982574108002280041800414994703500873200102080000160000201600001600008004180041118002110910108000080000010800000188001300013158008861142205022215228003808000010080000160000800108004280042800428004280042
24002480041620000000002401008002616600025320058800101600488000080010160000800004407617375837198262850800228004180041499470350022320010208000016000020160000160000800418004111800211091010800008000011080000018800170001780000611318050222152280038180000131380000160000800108004280042800428004280042
2400248004162000000000001018002600604025320050800101600008000080010160000800004407696375837198257450800228004180041499470350022320010208000016000020160000160000800418004111800211091010800008000001080000018800140000800186102205022115128003818000001080000160000800108004280042800428004280042
24002480041621000011000010180026166000253200568001016004680000800101600008000044076963758361982574108002280041800414994703500223200102080000160000201600001600008004180041118002110910108000080000010800000188000000014800136114005022115218003808000001380000160000800108004280042800428004280042
240024800416210000110024010080026066000253200588001016004880000800101600008000044076823758376982574108002280041800414994703500223200102080000160000201600001600008004180041118002110910108000080000010800000080014000080014611318050222153280038080000101080000160000800108004280042800428004280042
2400248004162000000000360011800261660002532005880010160000800008001016000080000440769637583789826277080022800418004149947035002232001020800001600002016000016000080041800411180021109101080000800001108000000800180001780000011822050221151180038180000131080000160000800108004280042800428004280042
24002480041622000001002000118002600600068320010800101600008000080010160000800004407696375837698251540800228004180041499470350022320010208000016000020160000160000801818004111800211091010800008000001080000008001400008001861180050222161180038180000101080000160000800108004280042800428004280042