Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2R (post-index, 8H)

Test 1: uops

Code:

  ld2r { v0.8h, v1.8h }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 4.004

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.004

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
63005289572330290033000102010046692852500116878400410002000100010002000100050005000238020522761287142890331040001000200020002000289922877611610011000100001000001000000100101300013243943868643093158120347321538122867682828910001600912792139231000200010002903728931289112897328913
63004290082330280026011000010046302854200016931400010002004100010002000100050005000238680922783289242941431040001000200020002000288452891911610011000100001000031001003100011200013200955969183141157520202320638121671742835010001581712739140121000200010002889728926289272899528852
630042900623202600320001040100458528547101168154004100020081000100020001000500050002386701222759287932901431040001000200020002000287562873611610011000100001000001003100100013000013250939669073039117420345320238172069712838610001569912718140681000200010002890928985289832890328856
63004289452330310127011000010046992854600016864400810002000100010002000100050005000237980422710287672889731040001000200020002000288472883711610011000100011000001001001100023300013081940269443105146920220320038161473712831210001581212700137821000200010002903529009290592895328918
63004290512330240025000002010046392854500116838400410002000100010002000100050005000238320722726287132894331040001000200020002000288222873011610011000100001000001003002100100200013220948269263136157320282323938161969742830710001569812811139341000200010002891128954289742895628908
63004290562330340028000003000047922855301016879400810002004100010002000100050005000238540722705287512896331040001000200020002000289492880011610011000100001000021000000100313000013213930369113119166520368320538112870702833310001566212685139281000200010002902529048289852887829008
63004290092330310026000000010047762856200017052400410002008100010002000100050005000238840722745288942885131040001000200020002000288192891411610011000100011000031000000100001000013154937369673121116820246318338163074732844410001562412750137881000200010002902528878289062896128838
630042890023202500240000016000047272856501116901400010002000100010002000100050005000238560522727288632898331040001000200020002000289002885511610011000100011000001001001100001300013215944669323149138020120324138192281752828910001571112806138841000200010002894428979289702894529045
63004289862330250029000002010047262845600116770400810002004100010002000100050005000238680522754287522915331040001000200020002000290122893711610011000100001000021001000100003200013361950769143103147420166319938181877722838410001560712894140281000200010002893128915290332895728917
63004288752320240033010000010047272852501016871400810002006100010002000100050005000238410622703288372891631040121000200020002000288682878111610011000100011000031000004100110200013177931969503113166920291320838172169842830910001564812536142731000200010002895428996289752896328937

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.8h, v1.8h }, [x6], x8
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0055

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e23243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70205140053112511100000110011400381395952590103501003000610000401003000010000123699953315111611504614003001400571400541308273131156801003020010119302416020020000300001400561400531150201100991004010010000100001100100022110001001100000111132101801113971350000666100002000050100140411140396140054140410140054
702041400551125100111001401014004613963725901065010030003100004010030000105481236999533150916114812140029014005714009613075331311618010030200100003000060200200003012114004514005711502011009910040100100001000001001003821100011041000011010323211211113972350000660100002000050100140054140150140137140055140055
7020414014611291201001015400114004113962425901065010030010100014010030000100001240800533143316125092140033014005714012013141131311568010030200100003000060200200003000014005314005511502011009910040100100001000011001000221100021171000011110321011211113972350000666100002000050100140042140054140057140054140057
70204140043113410000000170101400261395952590103501003000610000401003000010000123698153314711611572614003201400661400561307293131156801003020010000300006020020000300001400531400531150201100991004010010000100000100100011110001101100001111240311801113972350000666100002000050100140054140157140054140054140149
702041400561132100210002300014011913959825901035012830003100004010030000100401236981533198116119706140019014022514005713072931311858039130320100003012260200200003000014014914005611502011009910040100100001000001001000520100011041000011112321011211113972650000669100002000050100140042140155140057140195140042
7020414005611291012000028710114012513959553901465010030006100004024130000100391237044533154716114812140032014005314041913077331311888010030200100003012160200200003000014004114004111502011009910040100100001000001001000111100020011000001011321011211113972350000960100002000050100140042140054140042140054140054
7020414005311251001000020011400381396142590103501003000610000401003000010000123699053315471611530514002901401561400571307293131156801003020010000300006020020000300001400411400561150201100991004010010000100000100100011010001004100001101132101801113972550000066100002000050100140058140057140054140054140158
702041400581125101000001710114003913961425901065010030006100004010030000100001236981533045916116077140029014009714006913073031311568010030200100003000060200200003000014004414005311502011009910040100100001000001001000110100010011000001111321011211113972350000000100002000050100140055140054140055140054140055
70204140053112511000000510014003813961425901035010030003100004010030000100001236981533242316115726140029014012714006513071831311568010030200100003000060200200003000014005314005311502011009910040100100001000001001000221100010111000001110321011211113971350000000100002000050100140056140054140054140055140055
702041400411125110000001710114003813959525901065010030003100004010030000100001236981533147216114812140029314012214005913073231311578010030200100003000060200200003000014004114005411502011009910040100100001000001001000211100020111000011010321011211113972650000906100002000050100140054140055140057140057140054

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)0e0f18191e1f2223243a3f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
700251400531085000001000101000140035139656259001650010300061000040010300001000012458475333173161147790014002801400531400501307463131215800103002010000300006002020000300001400571400471150021109104001010000100000101000311100010041000010100314002087228139722500001069100002000050010140080140066140057140057140057
7002414005010850000000001300000140032139656259001650010300061000040010300001000012458795333285161147790014002601400501400501307343131194800103002010000300006002020000300001400501400471150021109104001010000100000101000121100010001000000000314002087222113970750000900100002000050010140108140057140042140057140057
7002414003510850000000001300000140035139666259001650010300031000040010300001000012458475339713161147790014002601400471400501307343131194800103002010000300006002020000300001400521400351150021109104001010000100000101000510100031061000010100314002087212113972250000999100002000050010140062140098140057140042140051
700241400501085000000000100000140123139653259001350010300031000040010300001000012459075333285161147790014002601400511400501307463131209800103002010000300006002020000300001400501400501150021109104001010000100000101000110100000001000010100316302087192013970750000900100002000050010140061140058140057140051140036
700241400501085000101010101000140035139650259001350010300001000040010300001000012459165333285161147790014002601401471400351307493131209800103014010000300006002020000300001400501400511150021109104001010000100000101000201100000001000000100314001587202013970750000099100002000050010140067140057140054140055140057
70024140056108600000000110000014008313965625900165002030006100004001030000100001245879533317316114779001400260140050140050130749313120980010300201004030000600202000030000140050140047115002110910400101000010000010100031110002013100000000031400208720713972350000909100002000050010140074140057140057140057140059
700241400501086000000000100000140035139656259001650010300061000040010300001000012483085333285161147790014002601400501400351307493131232800103002010000300006002020000300001400501400472150021109104001010000100000101000211100010001000011110314002087152013971950000909100002000050010140064140147140057140042140051
700241400501085000000000180100014010713966325900135001030003100004001030000100001245907533328516119915001400930140050140050130747313120980010300201004930000600202000030000140050140047115002110910400101000010000010100011110000003100000010031400208772013972250000060100002000050010140101140059140057140148140074
70024140050108500000000010100014002013965625900135001030003100004001030000100001245907533328516114779001400270140050140035130749313120980311300201000030000600202000030000140050140050115002110910400101000010000010100021010000103100001010031400108721813972350000900100002000050010140090140059140051140146140048
7002414003510860000000112650000114031713963411090073500203001710003408603343311188131204053593981612529900140239014034214033913083229131395809093038610120303686075420160304841402341403154150021109104001010000100000101000611100040063661000300100321001715225813987250000969100002000050010140059140054140057140058140060

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.8h, v1.8h }, [x6], x8
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0081

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0e0f18191e1f23243f4d51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70205140085108600000020001400401396232590103501003000310000401003000010000123725953326011611836201400601400841400841307603131189801003020010000300006020020000300001400841400811150201100991004010010000100000100100001100000000100001100321011211113975450000212124100002000050100140085140085140085140085140085
70204140084108600000020001400691396232590103501003000010000401003000010000123725953330361611801101400601400841400841307603131187801003020010000300006020020000300001400841400811150201100991004010010000100000100100001100000003100001100321011211113975450000242424100002000050100140086140056140085140085140085
70204140084108600000020001400401395972590103501003000310000401003000010000123725953326011611498001400601400841400841307603131187801003020010000300006020020000300001400841400811150201100991004010010000100000100100001100000000100001100321011211113975450000242124100002000050100140085140085140087140056140085
70204140084108600000020001400691395972590103501003000310000401003000010000123708353324871611498001400601400841400841307573131187801003020010000300006020020000300001400841400811150201100991004010010000100000100100001100000000100001100321011211113975450000242124100002000050100140082140085140056140056140085
702041400841086000000110001401401396262590103501003000310000401003000010000123726853326011611498001400601400841400551307313131158801003020010000300006020020000300001400841400811150201100991004010010000100000100100001100000000100001100321011211113972550000242124100002000050100140082140056140085140085140085
70204140084108500000020001400691396262590100501003000310000401003000010000123725953326391611498001400601400841400861307623131158801003020010000300006020020000300001400841400811150201100991004010010000100000100100001100000000100001100321011211113975450000242424100002000050100140085140085140085140085140056
70204140084108600000020001400401396232590100501003000310000401003000010000123724153326011611801101400601400841400841307603131187801003020010000300006020020000300001400841400871150201100991004010010000100000100100001100000000100001100321011211113975450000212424100002000050100140085140086140085140085140085
70204140084108600000020001400691396262590103501003000310000401003000010000123725953326011611801101400601400841400811307613131187801003020010000300006020020000300001400841400811150201100991004010010000100000100100001100000000100001100321011211113975450000242424100002000050100140085140085140085140085140085
702041400841086000000200014006913962625901035010030003100004010030000100001237268533260116118011014006014008414005513076031311878010030200100003000060200200003000014008114005511502011009910040100100001000001001000011000000031000011003210112111139754502682535921100002000050100140085140090140087140086144972
702041427491125600000110956401140069139626259010350100300031000040100300001000012372325331499161153190140060140084140085130771313118880100302001000030000602002000030000140084140081115020110099100401001000010000010010000110000000010000110032101121111397555000024024100002000050100140085140089140104140438140184

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0058

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233f4d5051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cdcfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140053108500000004010140043139639025900105001030003100004001030000100001245979533374816124874001400341400581400581307573131217800103002010000300006002020000300001400541400551150021109104001010000100000101000501100060001645510006110003256002872213973450000131010100002000050010140037140059140059140055140059
7002414005810860000000600014002113965802590013500103000010000400103000010000124585653335911612265800140012140036140058130757313121880010300201000030000600202000030000140058140036115002110910400101000010000010100000110000000010000100003140002873313972650000131310100002000050010140059140059140059140059140059
7002414005810860000000000014003913965902590013500103000010000400103000010000124599753335911612189200140034140058140058130757313121980010300201000030000600202000030000140058140054115002110910400101000010000110100000110000020010000100003140002873313973150000131313100002000050010140055140059140059140059140039
70024140058108600000001300014004313965802590013500103000310000400103000010000124597953335911612310700140012140036140036130757313119580010300201000030000600202000030000140054140058115002110910400101000010000010100000110000040010000110003140002872213973050000131013100002000050010140059140059140059140059140060
7002414005810890000000100014009213965802590013500103000310000400103000010000124597953335911611570810140034140054140058130735313121780010300201000030000600202000030000140058140054115002110910400101000010000010100000110000010010000110003140003873313982150000131313100002000050010140055140059140038140055140059
70024140036108600000001000140044139658025900135002030003100004001030000100001245988533359116129841001400341400581400361307591513121880010300201000030122600202008030000140060140054115002110910400101000010000010100000010000030610000110003140003873313973050000131313100002000050010140059140059140156140059140037
70024140058108600000001300014004313966102590013500103000310000400103000010000124597953335911611551700140034140147140058130757313121580010301401000030000600202000030000140058140054115002110910400101000010000010100000110000050610000110003140002873313973350000101310100002000050010140037140059140059140037140059
7002414003610860000000100014004313966002590013500103000310000400103000010000124597953335911612257000140034140058140058130757313121380010300201000030000600202000030000140058140058115002110910400101000010000010100000010000010010000110003140003872213972950000131313100002000050010140159141825140068140172140050
7002414005210850000000100014003713965202590010500203000310000400103000010000124592553333611612062200140028140052140052130751313120880010300201000030000600202000030000140057140049115002110910400101000010000010100030110002000322510002110013777002872213972450000869100002000050010140135140037140124140152140112
700241401471087000001013300014033213966702590028500103000310000400103000010000124585653353751611475000140029140052140052130751313121480010300201000030000600202000030000140052140049115002110910400101000010000110100000110000000310000110003140002872313972450000069100002000050010140053140056140050140053140053

Test 4: throughput

Count: 8

Code:

  ld2r { v0.8h, v1.8h }, [x6], x8
  ld2r { v0.8h, v1.8h }, [x6], x8
  ld2r { v0.8h, v1.8h }, [x6], x8
  ld2r { v0.8h, v1.8h }, [x6], x8
  ld2r { v0.8h, v1.8h }, [x6], x8
  ld2r { v0.8h, v1.8h }, [x6], x8
  ld2r { v0.8h, v1.8h }, [x6], x8
  ld2r { v0.8h, v1.8h }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f233f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24020580041621000000200080026066020032014080100160220825438374416756583059440820737583759826355180022800418004149924034999932010020080000160000200160288160000800418004111802011009910010080000800000100800790398001001016800146114180005110116118003818000096080000160000801008004280042800428004280042
2402048004162000000020008002616602532014080100160040800008010016000080000440821537583729826337080022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800000148001400014800136110180005110116118003808000096080000160000801008004280042800428004280042
24020480041620000000190080026166025320140801001600408000080100160000800004408215375836198263550800228004180041499240349999320100200800001605302001600001600008004180041118020110099100100800008000001008000001880808020246580103619180005110116118003808000096080000160000801008004280042800428004280042
2402048004162000005356741250160800261600253201408010016003080000801001600008000044082073758372982629208002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000110080000018800130001380014010180005110116118003808000090080000160000801008004280042800428004280042
240204800416000000002000800261660253201408010016000080000801001600008000044082153758372982633718002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000010080000018800140001780014619180005110116118003818000099080000160000801008004280042801678004280042
24020480041599000000190080026166025320140801001600008000080100160000800004408215375837798263531800228004180041499600349999320100200800001600002001600001600008004180041118020110099100100800008000001008000001480013000080014619180005110216118003818000099080000160000801008004280042800428004280042
2402048004159900000019008002610602532014080100160000800008010016000080000440821537583759826343180022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800000148001400014800136113180005110116118003818000099080000160000801008004280042800428004280042
240204800415990000002000800260660253201408010016004080000801001600008000044082053758374983574518002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000010080000008001300017800146110180005110116118003818000096080000160000801008004280042800428004280042
240204800416210000002000800261604253201388010016003880000801001600008000044082153758372982633708002280041800414992403499993201002008000016000020016000016000080041800411180201100991001008000080000110080000018800130001380014610180005110116118003818000099080000160000801008004280042800428004280042
2402048004162000000022008002616602532013280100160038800008010016000080000440820637583809826593080022800418004149924034999932010020080000160000200160000160000800418004111802011009910010080000800000100800000148001401014800136114180005110116128003808000099080000160000801008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk data (08)090e0f18191e1f233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2400258004162010000002600080026166025320058800101600468000080010160000800004407696375837698257508002208004180041499470315002232001020800001600002016000016000080041800411180021109101080000800001080000018801151014800140118220502064515448003818000001080000160000800108004280042800428004280042
24002480041621001000000008002616642532005880010160048800008001016000080000440769637583769825732800220800418004149947035002232001020800001600002016000016027280041800411180021109101080000800001080000018800170014800006114005020804155480144180000111080000160000800108004280042800428004280042
24002480041620000000024001800261660253204428001016040080000800101602658000044076963758237982574180217080328801925000420585002232001020802691600002016080016026580180804642180021109101080000800001080097018800170211908010761142205038803156480038080000131380000160000800108004280182801828004280183
24002480041620000001101760080308166068320056801971600508000080276160000801344407696375838098259918002208017980041499750350022320010208000016000020160000160000800418004111800211091010800008000010800000228001700148001861142205020503154580038180000131080000160000800108004280042800428004280042
240024800416200011000320008002606602532005080010160048800008001016000080000440769637583789825747800220800418004149947035002232001020800001600002016000016000080041800411180021109101080000800001080000018800181017800146114220502053416538003818000013080000160000800108004280042800428004280042
24002480041620001000023000800261660253200588001016004680000800101600008000044076963758361982574180022080041800414994703500223200102080000160000201600001600008004180041118002110910108000080000108000001880018002080018611424050202803165380038080000131080000160000800108004280042800428004280042
2400248004162100000002300080026066025320050800101600408000080010160000800004407696375838098260168002238004180041499470350022320010208000016000020160000160000800418004111800211091010800008000010800000188001800188001861182205020504154480038180000131380000160000800108004280042800428004280042
2400248004162100000002400080026166025320050800101600488000080010160000800004407661375837898257628002208004180041499470350022320010208000016000020160000160000800418004111800211091010800008000010800000228001820218001861131805020503164480038180000131380000160000800108004280042800428018680042
24002480041621000010024000800261664253200588001016004880000800101600008000044076963758376982574580022080041800414994703500223200102080000160000201600001600008004180041118002110910108000080000108000001880017101880018610005020506154480038180000131080000160000800108004280042800428004280042
240024800416200000000240008002616649253200588001016004680000800101600008000044076893758361982574180022080041800414994703500223200102080000160000201600001600008004180041118002110910108000080000108000001880000100800186118220502050315448003818000001380000160000800108004280042800428004280042