Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2R (2D)

Test 1: uops

Code:

  ld2r { v0.2d, v1.2d }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.004

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.006

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
63005294042201901600000240104609287940017144300620061000200010005000238512227422895929214310300010002000100020002915029128116100110001000110003100000011000303013043922869043096650206453096382316505328391162251397814982100020002935329248292762925929272
6300429359219210150000030104576287611017182300420061000200010005000238845228302916329266310300010002000100020002912329161116100110001000010002100100001000202012908930968993111949205943069382214475028446163991411215093100020002924329246293122929029316
630042927221811018000002010464928737111714330062008100020001000500023896222816291322933131030001000200010002000292542916711610011000100001000210000000100231301292393106917306484520667312338218494328408162311396515088100020002929929303291952923229204
6300429268219200180000030104636288160017269300420081000200010005000239144228422907529162310300010002000100020002910329173116100110001000010003100000011000303013004916769103064845205713090382912434428388159941375414905100020002929729374292692924629363
63004292232191701500000210104620287770117206300420061000200010005000238663226612905829253310300010002000100020002914829160116100110001000010003100001001000302012985916568693101843205303060382216485028525162881397614971100020002925729232292582934229377
630042930322018015000003010461028810101725630062006100020001000500023866222793290992932031030001000200010002000292372916411610011000100001000310010000100021301291592116861305765320706308438228435028337162561404514995100020002927429369292762935329365
6300429218220170130000030104669288480117290300620061000200010005000238986227812903529256310300010002000100020002919829113116100110001000010003100000001000202013302927669053107642205743080381914474628348164521365814972100020002931829367293272931529147
6300429258220160170000030104631287511117243300620061000200010005000238610227972902129333310300010002000100020002918829121116100110001000010002100100011001223013050916969033091640205743146382120494728390163171401815214100020002917129224292752928029277
63004292702191801600000301046652875300171723006200610002000100050002389632268429040292983103000100020001000200029127291541161001100010000100021000000010002030129729264683330931147205803081382114414328348162611409415117100020002918929358293542937029235
63004293212191901600000120104881287131017254300620041000200010005000238543228162899929285310300010002000100020002905329134116100110001000010003100000011001222013015918970063179644205723095382011444528425161731384714951100020002933629297291862924429264

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.2d, v1.2d }, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0047

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
702051401461048122111026588000140120139521139338129353578010340100300071000030100302951000012669796693388200794510140023014013714004713054301913114970100302001005330000602001005330158140253140102115020110099100401001000010000010010001411000241110000111120321011281113956540000666100002000040100140134140151140154140054140148
70204140157104911011007010114003813959213934512934725801064010030003100003010030000100001264381669368220080361014001701400531400441305630313114970100302001000030000602001000030000140053140053115020110099100401001000010000010010003211000112110000111120321021281113956540026606100002000040100140054140054140150140054140054
70204140153104911010002000114002613959213934512935925801064010030006100003010030000100001264525669368220082149014003001400531400411305610313115270100302001000030000602001000030000140053140053115020110099100401001000010000110010003211000201110000011100321011281113962640000666100002000040100140054140042140054140054140042
70204140041104911111001010114003813959213934512934725801064010030003100003010030000100001264304669368220082149014003201400531400531305610313114970100302001000030000602001000030000140053140053115020110099100401001000010000010010001111000200110000010120321011281113956540000006100002000040100140054140060140054140060140054
70204140053104910100002010114003813959213935512935925801064010030006100003010030000100001264304669309120082149014002901400531400531305610313114970100302001000030000602001000030000140041140053115020110099100401001000010000010010002111000200110000111111321011281113956540000666100002000040100140054140042140042140054140054
7020414004110491011100266000114003813945013934512935925801064010030006100003010030000100001264304669309120082149014002901400531400411305610313114970100302001000030000602001000030000140053140053115020110099100401001000010000010010004211000100110000111100321011281113964240000666100002000040100140054140054140054140054140054
7020414005310491110025273585220010014002013940413933812934125801034010030003100003010030000100001264310669338820079451014002301400471400471305430313111870100302001000030000602001000030000140035140047115020110099100401001000010000010010000001000010010000100000321011531113955940000660100002000040100140036140048140048140048140048
702041400471049000000040100140032139431139338129353258010340116300031000030100300001000012643286692791200812630140073014004714004713053102113131270100302001000030000602001000030000140047140047115020110099100401001000010000010010000011000000010000000000321011531113955940000666100002000040100140048140048140036140048140048
70204140047104900011002010114003813959213934512935925801064010030006100003010030000100001264381669309120082149014002901400411400531306210313111870100302001000030000602001000030000140047140047115020110099100401001000010000010010000011000000010000101000321011531113955940000666100002000040100140048140048140048140048140048
70204140047104800001001000014003213943113933812934125801034010030003100003010030000100001264328669338820081263014002301400471400471305430313111870100302001000030000602001000030000140035140035115020110099100401001000010000010010000011000000010000101000321011281113955940000606100002000040100140048140049140048140048140036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e2223243a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7002514005310491111004710001400411394991393431293562580013400103000310000300103000010000126471766938292008214911400320140056140041130574031311787001030020100003000060020100003000014005614004111500211091040010100001000001010000011000000010000111110314071217413957840000969100002000040010140081140077140051140051140051
700241400501049000000000001400351395501393451293962580013400103000310000300103000010000126477166935352008126311400110140050140035130553031311617001030209100003000060020100003000014004714003511500211091040010100001000001010004211000100110000111120314051215713956540000960100002000040010140077140050140053140051140051
700241400351049000000110001400351395501393451293472580016400103000310000301563000010000126471766938292008214911400320140041140053130574031311817001030020100003000060020100003000014005614005311500211091040010100001000001010002101000202110000111110314071217513957840000606100002000040010140078140046140054140054140057
700241400561049110000400001400411395501393321293622580013400103000610000300103000010000126479466936822008214911400290140056140056130574031311787001030020100003000060020100003000014005614005311500211091040010100001000001010004111000200110000111100314061205613957840000969100002000040010140122140043140054140042140057
700241400561049110101100011400261396081393321293622580013400103000610000300103000010000126479466938292008214911400170140041140041130574031311787001030020100003000060020100003000014005314005311500211091040010100001000001010000011000010010000001000314051214613957240000969100002000040010140125140063140057140057140042
7002414004110491110002100014003813944613934312935625800134001030003100003001030000100001264719669353520081263114002601400501400471305680401312217001030020100003000060020100003000014005014003511500211091040010100001000001010002411000100110000010110314051205713956340000069100002000040010140075140053140048140036140036
700241400501049000000610001400351395511393451293622580013400103000610000300103000010000126479466938292008036111400320140053140041130574031311787001030020100003000060020100003000014004114004111500211091040010100001000001010000011000000010000100000314051214713956640000096100002000040010140083140054140116140057140057
700241400611049101100200001400411394461393251293412580010400103000310000300103000010000126477166935352008169711400260140050140050130568031311757001030020100003000060020100003000014003514004711500211091040010100001000001010003101000101410000110100314051215713956340000969100002000040010140075140052140051140036140048
700241400351049000000110001400321394491393251293562580010400103000310000300103000010000126471966935352008126311400230140051140278130553031311737001030020100003000060020100003000014004714004711500211091040010100001000001010001201000200110000110100314051214713956340000990100002000040010140088140061140051140048140036
700241400501048000010100001400351395501393451293662580016400103000610000300103000010000126471766938292008036101400170140053140053130574031311817001030020100003000060020100003000014004114004111500211091040010100001000011010000011000400110000101000314061207713957240000999100002000040010140097140057140054140042140042

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2r { v0.2d, v1.2d }, [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0260

retire uop (01)cycle (02)030e0f1e1f22233f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70205140446105002100014024513977013955212956525801034010030000100003010030000100001266232670361320111930114023614003614026013076703131127701003020010000300006020010000300001402601402601150201100991004010010000100001100100001100000024010000110321011291113977240000101010100002000040100140037140037140261140261140261
702041402601051001000140245139770139326129565258010040100300041000030100300001000012662326692839201119301140012140260140036130532031313557010030200100003000060578100003000014026014026011502011009910040100100001000011001000011000030010004000321011291113977240000101010100002000040100140261140261140261140261140261
702041402601051001010140021139770139552129565258010340100300031000030100300001000012643196692839200795950140236140260140260130767031311277010030200100003000060200100003000014026014026011502011009910040100100001000011001000011000000271000011032101133111397724000010100100002000040100140261140037140037140261140037
70204140260104800100014024513977013955212956525801004010030003100003010030000100001266232669283920111930114023614026014026013053203131355701003020010000300006058010000300001400641402721150201100991004010010000100001100100001100000001000001032102129111397724000001010100002000040100140037140261140261140261140261
70204140260105000100014024513975013955212956525801034010030003100003010030000100001264319670404520111930014001214026014026013076703131355701003020010000300006020010000300001400361402601150201100991004010010000100000100100000100000001000010032101129111397724000501010100002000040100140261140261140261140037140261
70204140260104911100014002113940513932612956525801004010030003100003010030000100001266232670361320111930014023614003614026013076703131127701003020010000300006020010000300001402601400361150201100991004010010000100001100100001100000001000010032101133111395454000001010100002000040100140261140261140261140261140261
7020414003610510000001402451394051395521293422580100401063000010000301003000010000126623266928392011193011402361400631402601307690313135570100302001000030000602001000030000140887140260115020110099100401001000010000010010000110000000100001103210112911139772400001000100002000040100140037140261140261140261140261
70204140260104900001014024513940513955212956525801034010030003100003010030000100001266232669283920111930014023614026014003613076703131355701003020010000300006020010000300001402601400361150201100991004010010000100001100100000100001001000011132101129111395474000010100100002000040100140261140037140261140261140037
702041402601051001000140245139405139552129565258010040100300031000030100300001000012662326692839200795951140012140260140260130767031313557010030200100003000060200100003000014003614026011502011009910040100100001000011001000011000000010000000321011331113954540000101010100002000040100140261140261140037140037140261
702041400361051010010140245139770139326129565258010340100300001000030100300001000012643196703613201119300140236140260140260130532031313557010030200100003000060200100003000014026014026011502011009910040100100001000001001000011000000010000000321013111139545400001000100002000040100140037140037140261140037140037

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0464

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd2l1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140821113509114001443758093344100014071514073114013713001825800134001030003100003001031770109861313926671037220159098114244601424721400361305543131589700103002010000300006002010000300001404641402841150021109104001010000100001101000111100000001000011000003140001312171713998840000666100002000040010140465140465140037140679140679
7002414046410520000001100100000140449139862139326129342258001340010300001000030010300001000012647286692839201411970140434014046414046413055431315897001030020100003000060020100003000014046414046411500211091040010100001000001010000011000088001000010100003140409121814139984400008506100002000040010140465140037140037140465140466
7002414046410490000000000136000001400211398621397531293422580013400103000310000300103000010000126472867133662007959501404400140678140678131401313158370010302051000030000600201000030000140678140458115002110910400101000010000010100000110001000100001010000314000712181513998440000666100002000040010140465140037140465140037140037
70024140464105300000000001000001404491394881397531297682580013400173000310000300103000010000126853266928392007959501400120140036140036130554313158970010300201000030000600201000030000140036140464115002110910400101000010000010100000010000000100001000000314000812091813955940000066100002000040010140465140040140465140465140037
70024140036104900000000001010001400211394881397531293422580010400103000010000300103000010000126472867133662014119701404400140464140036130980313158970010300201000030000600201000030000140464140464115002110910400101000010000010100000110000000100000000010314000712181413957540000666100002000040010140459140679140465140465140465
70024140458105310010000000010001404491394881393261293422580013400103000310000300103000010000126473767135582014119701400120140464140464130554313116270010300201000030000600201000030000140464140464115002110910400101000010000010100000010000100100000010000314000812081713998740000660100002000040010140465140037140465140465140465
70024140036105200000000001000001400211398621393261297682580013400103000310000300103000010000126853266928392014119701404400140464140464130974313158970010300201000030000600201000030000140036140464115002110910400101000010000010100000010000000100001010000314000812181614019740000606100002000040010140465140037140465140465140037
70024140464105200000000001000001404491398621397531297682580016400103000010000300103000010149127705267057882014119701404400140036140464130980313158970010300201000030000600201000030000140464140036115002110910400101000010000010100000110000000100001010000319400712181714020440000000100002000040010140639140395140039140465140465
70024140464105200000010000010001400211398621397531297682580013400103000310000300103000010000126853266928392007959501400120140464140036130554313116270010300201000030000600201000030000140464140036115002110910400101000010000010100000110000000100001000000314000712081713998740000066100002000040010140037140465140037140037140469
70024140464105000000000001000001400211394881397531297682580013400103000310000300103000010000126853267133662014119701404400140464140036130980313158970010300201000030000600201000030000140036140036115002110910400101000010000010100000110000100100001010001314000712191313998540000606100002000040010140465140037140465140465140037

Test 4: throughput

Count: 8

Code:

  ld2r { v0.2d, v1.2d }, [x6]
  ld2r { v0.2d, v1.2d }, [x6]
  ld2r { v0.2d, v1.2d }, [x6]
  ld2r { v0.2d, v1.2d }, [x6]
  ld2r { v0.2d, v1.2d }, [x6]
  ld2r { v0.2d, v1.2d }, [x6]
  ld2r { v0.2d, v1.2d }, [x6]
  ld2r { v0.2d, v1.2d }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5008

retire uop (01)cycle (02)03090e0f1e22233a3f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
240205400633000114100040049080025240100100160082800001001600008000050041900618450940400220400644004199733100172401002008000016000020080000160000400414005911802011009910010080000800000100800000800310008003561313751102162240056140800001600001004006540065400654006040060
24020440065300000370004005010100025240100100160070800001001600008000050041900654399940400450400414006499733999924010020080000160000200800001600004006440041118020110099100100800008000001008000037800001035800356131051102162240061014800001600001004006540065400604006040060
240204400413000004100140049100002524018210016008280000100160000800005004184261845094140022040064400649973310022240100200800001600002008000016000040041400591180201100991001008000080000010080000378003500318003161310511021622400611414800001600001004006040042400654006540065
24020440064300000370014002688002524010010016000080000100160000800005004189734560014140045040041400419973310022240100200800001600002008000016000040041400591180201100991001008000080000010080000418003100358003560313751102162240061100800001600001004004240065400424006040060
2402044006430010041001400268800252401701001600008000010016000080000613417950184509414004504006440041997339999240100200800001600002008000016000040064400591180201100991001008000080000010080000418003500358003561310511021632400381410800001600001004006040065400654004240065
24020440041300000370004004988002524010010016000080000100160000800005004184031845094140045040041400599973310022240100200800001600002008000016000040064400591180201100991001008000080000010080000378003500358000001313251102162240056140800001600001004006540065400604006540060
240204400413000000100400498800252401821001600708000010016000080000500418403184509414002204006440064997339999240100200800001600002008000016000040064400411180201100991001008000080000010080000378005300358000000314151102162340038014800001600001004006540065400654004240042
24020440059300100500004002681000252401001001600708000010016000080000500417950456001414004604004140064997331001724010020080000160000200800001600004004140059118020110099100100800008000001008000008000000318003100041511021622400561414800001600001004006540065400604006540065
24020440064300000411004004901000252401001001600828000010016000080000500418420456001414004504006440059997331001724010020080000160000200800001600004006440041118020110099100100800008000011008000008003500080035013541511021622400381410800001600001004006540042400604006040060
240204400643000000001400498000252401821001600708025910016000080000500425299231570914004004006440041997331002224010020080000160000200800001600004004140059118020110099100100800008000001008000041800000008003560041511021622400401414800001600001004006540065400654006540065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03090e0f1e22243f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)daddfetch restart (de)e0eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24002540055300100301040040111125240010101600008000010160000800005041877939200140400224004140055999631003524001020800001600002080000160000400414004111800211091010800008000001080000000800240248002461243705020116511400520680000160000104005640056400564005640056
24002440055300000300040026111125240064101600548000010160000800005041874518456740400364005540055999631003524001020800001600002080000160000400554005511800211091010800008000001080261230080024024800246024005020116511400526080000160000104004240056400564005640042
240024400553000003000400401102524001010160000800001016000080000504187451845674040036400554005599963100352400102080000160000208000016000040055400551180021109101080000800000108000003008002402480024610005020116511400526680000160000104005640042400564005640056
240024400553000000104004011025240064101600008000010160000800005041874539200140400364004140055999631002224001020800001600002080000160000400554005511800211091010800008000001080000030080024024800240024005020116511400520680000160000104004240042400564005640042
240024400413000000004004011025240064101600548000010160000800005041851839200140400364004140055999631003524001020800001600002080000160000400554005511800211091010800008000001080000030080024024800246124005020116511400520080000160000104005640042400564005640042
2400244004130001100040026011252400641016005480000101600008000050418745392001404003640055400559996310035240010208000016000020800001600004005540055118002110910108000080000010800000300800240248000061243005020116511400386680000160000104004240042400424005640056
2400244005530000100040026110252400101016000080000101600008000050418518184567404002240055400419996310035240010208000016000020800001600004004140055118002110910108000080000010800000300800240248002461243005020116511400380680000160000104004240056400564005640056
2400244005530000030004004011025240064101600008000010160000800005041874518456740400364005540055999631002124001020800001600002080000160000400414005511800211091010800008000001080000030080024124800006003005020116511400520680000160000104005640056400564005640056
24002440055299000331040040111125240064101600548000010160000800005041874539200140400364005540055999631003524001020800001600002080000160000400554005511800211091010800008000001080000030080000024800000024005020116611400526080000160000104005640056400564005640056
24002440055300001301040040111125240064101600548000010160000800005041874539200140400364005540041999631002124001020800001600002080000160000400554005511800211091010800008000011080000030080024024800006124005020116511400520680000160000104005640056400564004240042