Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (single, H)

Test 1: uops

Code:

  ld2 { v0.h, v1.h }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.006

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.006

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f181e22243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f61696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
63005285642132120011410001310506128299000163543000200210002000100050042384030227542827828264310300010002000100040002827928201116100110001000010032010010103100020300138821017470453413879197013303381012544627865149871227113024100020002814128233282392835228698
630042823621201141020100151050842801900016340300220071000200010005000238189022787282352810731030001000200010004000279952832311610011000100001002201005002510022221213830100527077334865619655339638078565628049139721238314368100020002823728719287432822028333
63004285972140116011810004005082283850001629430062006100020001000500023898002276028538282813103000100020001000400028312281691161001100010000100332100103614100001212138901036472683354454196993388381217595427894152841250413136100020002823528362283812813128268
63004282852141119111800001005172284490011631230062007100020001000500023880402276328082282123103000100020001000400028222286281161001100010000100313100300251001113111340196517212340545019612327538088444827980151981228013871100020002816628294282322865628270
63004281712120119111310002005141279320011634530072007100020001000500023902202281628499282653103000100020001000400028142281421161001100010000100110100400171100122011139581034672923459550195343326381318514927890144991230614142100020002864228193282302810728275
6300428286212012010161000410513727971000162723010200210002000100050002382020227782811328634310300010002000100040002853228357116100110001000010031010040026100322312131641017471943191552195283158381417585528114145141317713187100020002829028638282522856428226
630042839421301181114100010050712840900016338300820081000200010005000238714022831281182837231030001000200010004000281362849911610011000100001002231001000910020131214137101647272334965719539337938117495128000143761262013599100020002834028678282262807528289
6300428267212011600191000170049592812101016297300620061000200010005000238260022758282042838231030001000200010004000285982821011610011000100001003221002001410002201013282967472073416763194983197381110525327813140481260214024100020002832028305283862842328627
63004285662110118102110003005192282870001625030062006100020001000500023878602276328289283893103000100020001000400028195286211161001100010000100232100400116100021311132191008872793213660196263414381810535527810153171267813594100020002829228140285982860928390
630042871121201211015000051052252796600016234300620081000200010005001238782022774281962862331030001000200010004000282322847811610011000100001002321001002810000221213550102986969340064819656320538136524627970143521298713197100020002833428215282632814228349

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.h, v1.h }[1], [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0057

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70205140041104911101000020100114004213945713935012936325801064010030003100003010030000100001264210669387820082745114003314005714005713056531311537010030200100003000060200100005000014005714005711502011009910040100100001000001001000841100080004147100001111132102172213956940000101010100002000040100140072140069140058140059140058
702041400571049111010100201001140026139657139332129363258010640100300061000030100300001000012642106693878200827450140033140057140057130565313115370100302001000030000602001000050000140057140057115020110099100401001000010000010010003111000200211000111111321021282213956940000101010100002000040100140097140058140059140058140058
702041400571049111010000101001140042139652139350129363258010640100300061000030100300001000012642106693878200827450140033140057140057130565313115370100302001000030000602001000050000140041140041115020110099100401001000010000010010003101000200111000011111321021282213956940000101010100002000040100140103140060140058140058140058
702041400571049111010000201001140042139652139350129363258010640100300061000030100300001000012642106693878200827451140033140057140057130565313115370100302001000030000602001000050000140058140057115020110099100401001000010000010010002211000200241000011110321021282213956940000101010100002000040100140128140062140058140058140058
7020414005710491010100002010011400421396541393501293632580106401003000610000301003000010000126421066938782008274511400331400571400571305653131153701003020010000300006020010000500001400571400571150201100991004010010000100000100100031110002011110000111103210212822139569400001000100002000040100140064140085140060140060140058
702041400571049111010000201001140042139655139350129363258010640100300061000030100300001000012642106693091200827451140033140041140060130549313115370100302001000030000602001000050000140057140057115020110099100401001000010000010010001311000100111000011111321021282213956940000101010100002000040100140065140058140058140058140058
702041400571049101010000201000140042139652139350129363258010640100300031000030100300001000012642106694022200827450140033140057140041130565313115370100302001000030000605841000050000140057140057115020110099100401001000010000110010014111000200111000011111321021282213956940000101010100002000040100140096140042140058140058140058
702041400571049101000000201001140042139652139350129347258010640100300061000030100300001000012642106693878200827450140033140057140057130565313115370100302001000030000602001000050000140057140057115020110099100401001000010000010010005111000200211000011011321021282213956940000101010100002000040100140110140061140058140058140058
70204140057104911001000020100014004213965213935012936325801064010030006100003010030000100001264210669387820082745114001714005714005713056531311537010030200100003000060200100005000014004114005711502011009910040100100001000001001000321100030011100001111132102128221395544000010100100002000040100140152140058140058140058140058
70204140041104911101000020100114004213965213935012936325801064013230003100003010030000100001264210669387820082745014003314004114005713056531311537010030200100003000060200100005000014004114005711502011009910040100100001000001001000211100010011100001111032102128221395694000010010100002000040100140136140046140058140058140058

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)03090e0f1e1f22233a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)dfe0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
700251400731089100100011400201394871393431293562580010400103000310000300103000010000126477166935352008126311400261400501400501305530313117370010300201000030000600201000050000140050140047115002110910400101000010000010100000010000000100001100131486212011813956940000666100002000040010140054140036140061140050140049
700241400351049010001011400321394481393251293532580013400103000310000300103000010000126471966933882008126311400111400501400351305530313116170010300201000030000600201000050000140035140047115002110910400101000010000010100000110000000100000100031486112111813955740000969100002000040010140114140052140048140036140051
7002414005010490001010114002013944613934312935325800134001030003100003001030000100001264748669353520081263014002614005014005013056803131175700103002010000301896002010000500001400501400351150021109104001010000100000101001001100002033100001120031486112011813955740000069100002000040010140054140051140036140048140077
7002414003510490001010114003213948713934312941925800104001030003100003016030000100001264748669353520081697014001114005014005013056873131161700103002010000300006002010000500001400351400941150021109104001010000100000101000000100000012100001000031486112111813956940000960100002000040010140036140036140048140048140048
700241400471049000000011400351394461393251293412580010400103000310000300103000010000126471966933882008126301400231400351400471305650313117370010300201000030000600201000050000140047140047115002110910400101000010000110100000010000000100001100031486112111813955740000669100002000040010140037140048140051140051140051
700241400501049000101011400351394911393381293562580013400103000310000300103000010000126477166935352008720901400151400501400351305530313117370010300201000030000600201000050000140035140047115002210910400101000010000010100000110000000100000100031486112111813956940000666100002000040010140051140051140036140048140036
700241400501049000400011400201394461393251293562580013400103000010000300103000010000126474866933882007945101400111400351400501305530313117570010300201000030000600201006350000140047140035115002110910400101000010000010100000010000000100000100031486112111813955740000909100002000040010140051140048140036140051140036
700241400501049000601011400351394461393381293412580013400103000310000300103000010000126474866927912008169701400231400471400471305650313117370010300201000030000600201000050000140050140047115002110910400101000010000010100000110000000100001100031486112011813956940005096100002000040010140036140051140048140048140048
700241400501049000101011400201394461393431293412580013400103000310000300103000010000126477166935352008126301400111400351400501305680313117370010300201000030000600201000050000140050140047115002110910400101000010000110100000010000100100001000031486112011813957240000666100002000040010140036140051140036140048140048
700241400351049000101011400351394911393431293412580013400103000610000300103000010000126487066927912008169711400111400471400501305650313116170010300201000030000600201000050000140050140035115002110910400101000010000110100000110000000100001100031486112111813957240000090100002000040010140051140036140051140036140036

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.h, v1.h }[1], [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0260

retire uop (01)cycle (02)030e0f18191e1f223f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
702051406821050000010114024513977014017512956525801034010030003100003010030000100001266232669283920111930140236014026014026013139131313557010030200100003000060200100005000014008514026011502011009910040100100001000011001000001000000100001100321001129111403964000013100100002000040100140888140888140888140906140888
702041402601049110010114087214043914017512934225801004010030000100003010030000100001272055670261620111930140863014088714088713139131319827010030200100003000060200100005000014026014026011502011009910040100100001000001001000001000000100001100321001133111403964000013013100002000040100140261140888140037140888140888
7020414088710490000100140872139770139326130189258010340100300031000030100300001000012662326692839200795951408630140036140036131391313112770100302001000030000602001000050000140887140260115020110099100401001000010000010010000110000001000011003210011711139772400000100100002000040100140037140533140358140433140261
7020414026010510112436192014024514043913932613018925801004010030000100003010030000100001264319669283920079595140863014026014026013053231311277010030200100003000060200100005000014026014003611502011009910040100100001000011001000011000013100001100321001129111403964000013013100002000040100140310140889140404140839140296
702041400361052004160014024513977013932612956525801034010030003100003010030000100001264319673363120079595140236014026014026113053231311297010030200100003000060200100005000014026114026011502011009910040100100001000011001000011000010100001000321001133111403964000010100100002000040100140261140268140261140889140889
702041408871050000010014087214043914017513018925801034010030003100003010030000100001272055669283920202004140863014003614026013139131319827010030200100003000060200100005000014026014026011502011009910040100100001000001001000001000000100001100321001133111403964000013130100002000040100140685140037140037140888140037
7020414088710510000100140021140439140175129342258010040100300031000030100300001000012720556692839200795951402360140887140260131391313112770100302001000030000602001000050000140887140887115020110099100401001000010000010010000110000001000011003210011291114039640000131313100002000040100140888140888140037140888140888
702041408871050000000014087214043914017512934225801034010030003100003010030000100001272055673363120202436140863014003614088713139131319827010030200100003000060200100005000014088714003611502011009910040100100001000011001000011000000100001000321001133111395454000013013100002000040100140037140261140888140888140037
702041402601050000060014087214043913932613018925801034010030003100003010030000100001264319673363120111930140863014003614046913053231319827010030200100003000060200100005000014088714026011502011009910040100100001000011001000011000000100000100321001129111397724000010130100002000040100140261140037140888140038140243
702041408871049110010014002113940513932612934225801034010030003100003010030000100001272055669283920111930140863014088714003613053231311277010030200100003000060200100005000014088714026011502011009910040100100001000011001000011000000100000100321001129111403964000001010100002000040100140888140037140037140037140037

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0260

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e1f22243a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140532105000100101010114047213948813955212934225800104001030000100003001030000100001266662670361320112362114023914026014026013077731311627001030020100003000060020100005000014026014026011500211091040010100001000001010001211000101110000111110314051222414024140000101010100002000040010140683140683140683140683140683
70024140036105100000000010114066814023513997212998525800164001030003100003001030000100001270465672382120140295114065814068214068213119731318067001030020100003000060020100005000014068214068211500211091040010100001000001010000011000000010000101000314021222414026640000101010100002000040010140683140683140683140683140716
700241406821054100100010010114066713971613955212956525800134001030003100003001030000100001266662670361320111930114023614026014026013077731313847001030020100003000060020100005000014026014026011500211091040010100001000001010002001000000010000001000314051224214020140000101010100002000040010140683140683140683140683140683
700241404581053110000000101140667139716139552129565258001340010300031000030010300001000012666626703613201119301140236140260140260130777313138470010300201000030000600201000050000140260140260115002110910400101000010000010100000110001202710000001000314021222414020140000101010100002000040010140261140261140037140261140261
70024140036105100000001010014024513971613955212956525800134001030003100003001030000100001266662670361320111930114023614026014026013077731313847001030020100003000060020100005000014026014026021500211091040010100001000011010000011000000010000101000314021224214024740000101010100002000040010140727140635140683140683140261
70024140261105000000001010114098814023513979912972825800164001030006100003001030000100001270465672382120172574114023614026014026013077731313867001030020100003000060020100005000014026014003611500211091040010100001000001010000011000000010000101000314041224213978140000101010100002000040010140261140261140261140037140037
70024140260105000001001010014024513948813955212956525800134001030003100003001030000100001266662670361320111930114023614026014026013077731313847001030020100003000060020100005000014032714026411500211091040010100001000001010000011000000010000101000314021222413978140000101010100002000040010140261140261140261140261140261
70024140260105100000001010014066714023513932612934225800134001030003100003001030000100001266662670361320111930114023614026014026413077731313847001030020100003000060020100005000014068214068211500211091040010100001000001010001311000200110000101000314021224413978140000101010100002000040010140442140261140261140261140261
7002414003710500000002107810810014024513971613955212956525800134001030003100003001030000100001266662670361320111930114023614026014026013077731313847001030020100003000060020100005000014026014026011500211091040010100001000001010000011000020310000101000314021225213978140000101010100002000040010140261140262140261140455140261
700241406391051000000013010014024513971613955212956525800134001030003100003001030000100001266662670361320111930114023614026014026013077731313847001030020100003000060020100005000014026014026011500211091040010100001000001010000011000010310000101000314021203413978140000101010100002000040010140261140261140261140261140261

Test 4: throughput

Count: 8

Code:

  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.h, v1.h }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.h, v1.h }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.h, v1.h }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.h, v1.h }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.h, v1.h }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.h, v1.h }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.h, v1.h }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.h, v1.h }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.6263

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e2223373a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd alu (9a)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400205501293760010002510206150073201002624033110016026480008100160022800125004000711922723150103500675006709699240134200800121600222008001232004450102500981124020110099100100800001600008000001008001411080047004480035614437112111151173164450097099800003200001005015150099501425014250142
4002045009637511010050109415006820101226240501100160236800081001600228001250040007119317651500785009850098096442401342008001216002220080012320044500825006711240201100991001008000016000080000010080014113780015024480002614337111111151174163350102009800003200001005012650106500835010150131
40020450082375101000110017405016001000262403441001604768000810016002280012500400071192570205004850186501000426962401342008001216002220080012320044500765009711240201100991001008000016000080000010080013133780015004380002611137110111151173162450102099800003200001005006850119501065010350198
4002045009837511110010009215005220105272401491001600888000810016002280012500400299191523205011250089501270156742401342008001216002220080012320044501055006711240201100991001008000016000080000010080013113780046004380002614337110111151173163350095099800003200001005007150097500815009850248
40020450103375100000500011615008321010026240155100160307800081001600228001250040029619191141500485009850067036614324013420080012160022200800123200445006850108112402011009910010080000160000800000100800111108004501448003360100110000151105163750094099800003200001005012250068500825012950083
4002045009637510011110004290501350100026240567100160228800001001600008000050040024819261761500795009850098074355240100200800001600002008000032000050067501191124020110099100100800001600008000011008001211080044004780000614437100000151105164550093099800003200001005015150142502265006850070
40020450119375111000110012005012601010026240135100160218800001001600008000050040024219177251500635009850225015324240100200800001600002008000032000050098500871124020110099100100800001600008000011008001212378004502448003361430111000151104164450071090800003200001005012650124501285006850099
40020450117375000000320010505014201010026240100100160312800001001600008000050040006719273271500695010050088003242401002008000016000020080000320000500975007711240201100991001008000016000080000110080000023800000008002660190000001511041656501060010800003200001005009950068501165012550103
40020450081375000000410019315007301010026240364100160000800001001600008000050040000019257561500955014450106047334240100200800001600002008000032000050094501061124020110099100100800001600008000001008000000800260025800266120290000015110416355065601010800003200001005012450139500685010850115
4002045011437500000032002371500992100426240498100160000800001001600008000050040000019169671500795010650096003242401002008000016000020080000320000500965011411240201100991001008000016000080000010080000008002610258002661200000001511041634500980610800003200001005009650068500685008950102

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.6259

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e1f22373a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696b6d6erob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd alu (9a)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0e7? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
40002550129375110105000102350063010100292401231316000080000131600008000068400060191508821050097050075501062136324001320800001600002080000320000501505038911240021109101080000160000800000108001213378004500043800336042371100150292510821311412817950057031620080000320000105006150059501015008050108
400024500733750000031009945007621000292403761316016280000131600008000068400067192064831050088050090500604835724001020800001600002080000320000500605039711240021109101080000160000800000108000000800000000800250020230001503025108112916149716500650312610280000320000105007650045501065010850107
4000245004437600000310017235007420108292404471016034880000101600008000068400067191449631050072050060500917335824001020800001600002080000320000500605036211240021109101080000160000800000108000002380026010268002561192900015030251081429148691450041031166280000320000105004550098500595009050061
4000245012537400000320167450066210100292403341316033880000101600008000050400242192135031050124050090500588331582400132080000160000208000032000050062503821124002110910108000016000080000010800000080019000258001900192900015029281091031128881550057031260180000320000105008850092501055010650144
4000245007137500000890126245004521010029240013131603628000013160000800006840006719226831105008805011650094032324001320800001600002080000320000500605036711240021109101080000160000800000108001010080045001118004901100111015033311310103114107141050120101366280000320000105011750120500835005950117
40002450090375000003201396500450100029240183131602688000013160000800006840024219183262105007205009050060036024001320800001600002080000320000500905040511240021109101080000160000800001108000000800260002580026610000015034311310931281010131450100031819280000320000105010150074500595009850098
4000245004637411100100005500452101002924034413160000800001016000080000684000681914496110500880500585012203232400102080000160000208000032000050060506581124002110910108000016000080000010800000238000000008000000192900015033311310831141010813500650318210080000320000105006150059501035005950071
40002450044375000000010450066010002924001313160196800001016000080000504001351933972210500880500605009231344240013208000016000020800003200005004450366112400211091010800001600008000001080000023800260002880019012529000150343113108291210761450086031426180000320000105009050059500885005950082
40002450073375000002601224550084200029240298131601608000013160000800006840012619210011105003905011950124035824001020800001600002080000320000500605036211240021109101080000160000800000108001211378004600143800006111371100150323113107292016916850055031466180000320000105006150106501125005950095
400024500613750000031002195500910101017292407821316030280000131600008000068400059191251811050072050133500607531052402132080000160000208000032000050119503291124002110910108000016000080000010800000238002500008000060202900015030311310729261091313501140312610280000320000105006150106500985007750061