Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (multiple, 2D)

Test 1: uops

Code:

  ld2 { v0.2d, v1.2d }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 4.006

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.006

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e1f22233a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
640052928821951410100060104589288410201716540062006200020002000100002387050022788290682932731040002000200020004000291242907511610011000100012004442004001220024242013424921668533296274203043083381611605628348164811351414975200020002932229298293032939629375
640042925421941010100060004545288380221701140082008200020002000100002391920022916291862934831040002000200020004000291902909811610011000100002003442003001420004262212804918968953097060203593007381314605128333163131348715127200020002928929413293852938029347
640042939921961110000080004703287870201711440082008200020002000100002391270022767291102933831040002000200020004000290992906611610011000100002002242006000220024242012949916868653023256202963090381212605128442164841352115014200020002935029215293342932129330
640042939021961010111062132004552289160001714540082008200020002000100012388320022818290892927831040002000200020004000292082912411610011000100002003362002000220024462212980916869513073259201783126381119645728410163271354315135200020002930729347292822927629181
640042928121871010100080004544287560001714040082008200020002000100002390440022761291492923531040002000200020004000291332916311610011000100002004242002000220014842112898906169223079454203663090381615575328374164851354514922200020002932229289292222923429326
640042926022071010111010000465228832000169974008200820002000200010003239244002284729137292023284000200020002000400029111292031161001100010000200334200200148520004442012953908869273066065202693059381212636428372165371337115137200020002922929141292882920329309
6400429383219310001000801045782890422116975400620082000200020021001023890100229372908029277171040002000200020004000291872914611610011000100002002342002000520006262213099921268923146358202233119381210625928734161211357614828200020002929729175292002929129284
640042925921981000011060004532288510221717740082008200020002000100002388620022822290912934531040002000200020004000291262919511610011000100002004242004001520004242112973913469073104258201963086381318535828427163711330215019200020002930229352292802924329310
640042921522041010101060004486288100201699840062010200020002000100002389930022801290412923731040002000200020004000291832921911610011000100002004242006000620004462013068911669083111170202933059381111616328495164851333514961200020002930129380293012930529209
640042932221971010000070004668288230001719040082006200020002000100002392450022872289822941931040002000200020004000290982913221610011000100002003242004000220004442013053924468803151162202263088381710596628372163921345214890200020002932829271292322933929286

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.2d, v1.2d }, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0047

retire uop (01)cycle (02)03l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
8020514005110490011002010001400368477512968625901034010030000200003010030000200001204418566920681218277911400111400511400841295053129934801003020020000300006020020000500001400511400361150201100991004010010000100000100200002200000002000022200003210216111397914000010010200002000040100140036140052140052140036140052
80204140051104900000000100014003684775129686259010340100300132000030100300022000012043796669187612182602014002314004714004712952061299258010230203200043000560206200045000914004714004711502011009910040100100001000001002000022000000020000200011132171160013980340000006200002000040100140048140048140048140048140048
80204140047108500113232409727280100142882958421309538399057340332301312006432726327902155012164382677075012310277014156414166114205412997931299188010030200200003000060200200005000014004714004711502011009910040100100001000011002000022000020020000202000032101161113978740000666200002000040100140049140048140048140048140048
802041400471049000000140010014003284771129688259010340100300002000030100300002000012043837669187612182337114002314004714004712950131299328010030200200003000060200200005015514013914005111502011009910040100100001000011002000022000710320000200000032101161113978740000666200002000040100140048140038140049140048140048
802041400391099000000140010014003284771129682259010040100300002000030100300002000012043837669129012182337014002314004714003512948931299308010030200200003000060574200625000014004714004711502011009910040100100001000001002000022000000020000200000032101161113978740008066200002000040100140050140048140048140036140048
80204140035104900003127888000014003284771129682259010040100300002000030100300002000012043837669187612181242114002314004714004712950131299308010030200200003000060200200005000014004714003511502011009910040100100001000001002000022000000020000202000032101161113978740000666200002000040100140048140036140036140048140036
80204140047104900000020010014003280130129682259010340100300002000030100300002000012043837669187612181242014002314004714004712950131299308010030200200003000060200200005000014004714004711502011009910040100100001000001002000022000000020000002000032101161113978740000066200002000040100140048140048140048140048140048
80204140048104900000000000014003284771129682259010340100300032000030100300002000012042832669187612182337014002314003514003512950131299298010030200200003000060200200005000014003514004711502011009910040100100001000001002000022000010320000002000032101161113978740000006200002000040100140036140036140048140048140048
80204140047104900000060010014003280132129682259010340100300032000030100300002000012043837669187612181242014002314004714004712950131299308010030200200003000060200200005000014004714004711502011009910040100100001000011002000002000000020000202000032101161013978740000666200002000040100140048140102140076140050140048
80204140047104900000000010014003284771129682259010040100300032000030100300002000012042832669187612182337014002314003514004712950131299308010030200200003000060200200005000014003514003511502011009910040100100001000011002000022000000020000202010032101161113978740000666200002000040100140048140048140048140036140036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0047

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f181e22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
80025140047104900000000200014003284771129682259001340010300032000030010300002000012043387669192412182517114002314009614004712959131300228001030020200003000060020200005000014003514004711500211091040010100001000001020000022000001020000202000031409168913978140000666200002000040010140054140054140054140054140054
80024140053104910111000810114003884777129688259001640010300062000030010300002000012043909669216412183051014007314004714004712959131300208001030020200003000060020200005000014004714004711500211091040010100001000001020000022000000020000202000031408168713978740000666200002000040010140050140048140048140048140052
800241400471049000000102100140032847711296702590013400103000320000300103000020000120433876691290121825170140023140047140047129591271300208001030020200003000060020200005000014004714004711500211091040010100001000001020000022000000020000202000031409168813978740000666200002000040010140048140048140048140048140048
800241400471049000000002100140032847711296822590013400103000320000300103000020000120433876691876121825170140023140047140035129591313002080010300202000030000600202000050000140048140047115002110910400101000010000010200000220000000200002020000314010168913978740000666200002000040010140048140048140048140048140048
80024140047104900000000210014002084771129682259001340010300032000030010300002000012043387669187612182517014002314003514004712959131300208001030020200003000060020200665000014004714014011500211091040010100001000001020004002000000020000202000031409168713978740000666200002000040010140048140048140048140036140048
80024140047104900000000210014003284771129682259001340010300032000030010300002000012043387669187612182517014002314004714004712959131300208001030020200003000060020200005000014004714003511500211091040010100001000001020000022000000020000202000031407168813978740000660200002000040010140048140036140048140048140048
800241400471049010000102100140310847711296822590013400103000720000300103000020000120728256691876121825170140023140047140389129591521300208001030020200003000060206200005016414057314004711500211091040010100001000001020000022000000020000202000031401016131113978740056666200002000040010140048140048140048140577140048
800241401361049000000001700014003284771129682259001340010300032000030010300002000012043387669187612183563014002314004714004712959131300208001030020200003000060020200005000014004714003511500211091040010100001000001020004002000000020000202000031408168913978740000666200002000040010140048140048140048140048140048
800241400471048000000002100140032847711296822590010400103000320000300103000020000120433876691876121825171140023140047140047129591313002080010300202000030000600202000050000140047140047115002110910400101000010000010200000220004045020000202000031409168713978740000666200002000040010140048140048140048140048140048
80024140047104900000000200014003284771129670259001340015300032000030010300002000012043387669187612182517014001114004714004712959131300208001030020200003000060020200005000014004714004711500211091040010100001000001020000022000000020000202000031408168713978740000666200002000040010140048140048140048140048140048

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.2d, v1.2d }, [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03090e0f1e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
8020514005110491002401001400428477512967025901034010030000200003010030000200001204283266921161218269311400110140055140054129509312993880100302002000030000602002000050000140055140035115020110099100401001000010000010020000020000002000020003210116111397954000001110200002000040100140036140036140056140056140056
8020414005110490000000114003684779129690259010340100300032000030100300002000012042832669206812182693014001101401211400381295093129918801003020020000300006020020000500001400551400351150201100991004010010000100000100200002200001020000220032101161113977540000101410200002000040100140056140056140036140056140036
8020414003510490002010114008984775129690259010340100300032000030100300002005012050139669226012181242014002701400511400511295053129934801003020020000300006020020000500001400551400511150202100991004010010000100000100200000200001020000220032101161113977540009101414200002000040100140056140036140036140052140052
802041400551049000201001400368477912969025901004010030000200003010030000200001204453366922601218269301400270140035140055129509312993880100302002000030000602002000050000140055140051115020110099100401001000010000010020000020000002000002003210116111397754000001410200002000040100140107140097140056140036140036
802041400351049000000001400788013012969025901034010030003200003010030000200001204283266922601218124201400310140035140035129489312993880100302002000030000602002000050000140051140035115020110099100401001000010000010020000020000002000000003210116111397754000001010200002000040100140036140052140052140036140036
802051400551049000001001400208013012969025901004010030000200003010030000200001204453366922601218124201400310140055140055129509312992480100302002000030000602002000050000140055140035115020110099100401001000010000110020000020000002000002003210116111397914000010010200002000040100140036140056140036140056140036
80204140051104900000000140132801301296702590103401003000320000301003000020000120445336692260121812420140031014003514003512950531299188010030200200003000060200200005000014005514005111502011009910040100100001000001002000022000000200002000321011611139797400000100200002000040100140052140036140056140036140036
802041400551049000200011400378477912968625901034010030003200003010030000200001204283266920681218269301400110140035140035129509312993880100302002000030000602002000050000140051140051115020110099100401001000010000010020001220000002000022003210116111397954000010140200002000040100140056140056140056140036140056
80204140055104900020000140076801301296872590100401003000320000301003000020000120441856691290121812420140011014005514005512948931299348010030200200003000060200200005000014003514003511502011009910040100100001000001002000022000000200002200321011611139775400000010200002000040100140056140036140056140056140056
8020414003510490002000014003680130129686259010040100300032000030100300002000012042832669129012183049014003101400551400351295093129918801003020020000300006020020000500001400551400511150201100991004010010000100000100200000200000320000020032101161113979540000000200002000040100140041140056140056140052140036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
80025140051104900000610114002680138129676259001640010300032000030010300002000012044257669158812183407114003714006114006112960531300248001030020200003000060020200005000014005114005121500211091040010100001000001020000020000032000020003221421611139801400001400200002000040010140062140042140062140062140062
8002414006110491110041011400208477512967025900134001030003200003001030000200001204232266920681218142211400271400511400511295793130008800103002020000300006002020000500001400511400511150021109104001010000100000102000022000003200000000314051161113979540000141414200002000040010140107140077140036140052140056
80024140055104900000000114004080130129670259001340010300002000030010300002000012044083669226012182873014001114003614005512959931300288001030020200003000060020200005000014005514005111500211091040010100001000001020000020000002000002003140421611139795400000100200002000040010140056140036140056140056140056
8002414005510490000001011400408013012967025900104001030003200003001030000200001204373566920681218142211400271400551400351295953130028800103002020000300006002020000500001400551400351150021109104001010000100000102000002000000200002200314041161113977540008101010200002000040010140036140056140052140056140052
800241400511049000002000140040847791296902590013400103000320000300103000020000120437356691290121828731140011140055140055129599313002480010300202000030000600202000050000140055140051115002110910400101000010000010200002200001020000000031404116111397754000010010200002000040010140052140053140052140052140052
8002414005110490000021001400208477512968625900134001030000200003001030000200001204408366922601218287311400311400551400551296003130008800103002020000300006002020000500001400551400351150021109104001010000100001102000022000000200002200314052161113977540000000200002000040010140036140036140056140052140036
800241400551049000000000140020847791296902590013400103000020000300103000020000120423226692068121828730140105140055140055129635313002880010300202000030000600202000050000140055140051115002110910400101000010000010200002200000020000020031404116111397914000014100200002000040010140036140036140036140036140036
8002414005510490000020001400208477912967125900134001030003200003001030000200001204408366920681218287311400311400551400351295993130028800103002020000300006002020000500001400511400511150021109104001010000100000102000022000000200002200314051161113979140000101010200002000040010140052140052140036140036140052
8002414005110490000021011400208477912967025900134001030000200003001030000200001204408366920681218287301400301400351400351295993130024800103002020000300006002020000500001400511400511150021109104001010000100000102000022000000200000200314051161113979540000101010200002000040010140052140052140036140052140052
8002414007910490001132100140040847801296862590010400103000320000300103000020000120423226692068121828730140031140055140035129579313000880010300202000030000600202000050000140035140051115002110910400101000010000110200002200000020000220031404116111397754000010014200002000040010140056140052140054140036140052

Test 4: throughput

Count: 8

Code:

  ld2 { v0.2d, v1.2d }, [x6]
  ld2 { v0.2d, v1.2d }, [x6]
  ld2 { v0.2d, v1.2d }, [x6]
  ld2 { v0.2d, v1.2d }, [x6]
  ld2 { v0.2d, v1.2d }, [x6]
  ld2 { v0.2d, v1.2d }, [x6]
  ld2 { v0.2d, v1.2d }, [x6]
  ld2 { v0.2d, v1.2d }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320205800695990010003800028002620532532015210016005216000010016009016000050080037419210521800228004180041032332010020016000016000020016000032000080041800411180201100991001008000080000010016000003516003200321600326132350000510911711800380131321600001600001008004280042800428004280042
32020480041599101001580002800262563253201541001600561600001001600001600005008013721922540080022800418004103233201002001600001600002001600003200008004180041118020110099100100800008000001001600141343160054005216003961514312200510911711800380131321600001600001008004280042800428004280042
32020480041600110000570000800262121202532013810016003816000010016000016000050080037419210520800228004180041032332010020016000016000020016000032000080041800411180201100991001008000080000110016000003516003200321600326132350010510911711800381101001600001600001008004280042800428004280042
32020480041600000000380000800262121202532013810016003816000010016000016000050080037719210820800228004180041032332010020016000016000020016000032000080041800411180201100991001008000080000010016000003516003200321600326132350000510911711800381101001600001600001008004280042800428004280042
32020480041600000000380100800262121202532013810016003816000010016000016000050080037719210941800228004180041032332010020016000016000020016000032000080041800411180201100991001008000080000010016000003516003200321600326132350000510911711800381101001600001600001008004280042800428004280042
32020480041599000000380000800262121202532013810016003816000010016000016000050080034219210640800228004180041032332010020016000016000020016000032000080041800411180201100991001008000080000010016000003516003200321600326132350000510911711800381101001600001600001008004280042800428004280042
320204800416000000003800008002621212025320138100160038160000100160000160000500800377192108208002280041800410252332010020016000016000020016000032000080041800411180201100991001008000080000010016000003516003200321600326132350000510911711800381101001600001600001008004280042800428004280042
32020480041600000000380000800262121202532010010016003816000010016000016000050080037719210700800228004180041032332010020016000016009220016000032000080041800411180201100991001008000080000010016000003516003220391600326132400000510911711800381101001600001600001008004280042800428004280042
32020480041600000000380000800262121202532013810016003816000010016000016000050080037719210860800228004180041032332010020016000016000020016000032000080041800411180201100991001008000080000010016000003516003200321600326132350000510911711800381101401600001600001008004280042800428004280042
3202048004160000000038010080026212120433201381001600381600001001600001600005008003771921402080022800418004103233201002001600001600002001600003200008004180041118020110099100100800008000001001600000351600320032160032613200000510911711800381101001600001600001008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)d9ddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320025800555990000010003801000800262121202532004810160000160000101600001600005080037419211041800220800418004103233200102016000016000020160000320000800418004111800211091010800008000011016000003501600320032160032613235000501901117012128003811010160000160000108004280042800428012880042
32002480041600000000000380100080026212120253200481016003816000010160000160000508003741920000080022080041800410333320010201600001600002016000032000080041800411180021109101080000800000101600000350160032103216003261323500050190121701278003811010160000160000108004280042800428004280042
3200248004159900000000038010008002621212025320048101600381600001016000016000050800377192106408002208004180041032332001020160000160000201600003200008004180041118002110910108000080000010160000035816003200016003261323500050190111701111800381100160000160000108004280042800428004280042
32002480041600000000000380100080026212120253200481016003816000010160000160000508003771921100080022080041800410323320010201600001600002016000032000080041800411180021109101080000800000101600000350160032003516003261323500050190121701178003811010160000160000108004280042800428004280042
32002480041600000000000380100080026212120253200481016003816000010160000160000508003771921064080022080041800410323320010201600001600002016000032000080041800411180021109101080000800000101600000350160032003216003261323500050190141709138003811010160000160000108004280042800428004280042
320024800415990000011003801000800262121202532001010160038160000101600001600005080037719210640800220800418004103233200102016000016000020160000320184800418004111800211091010800008000001016000003501600320032160032613235000501901117011108003811010160000160000108004280042800428004280042
320024800415990000010003801000800262121202532004810160038160000101600001600005080037719211180800220800418004103233200102016000016000020160000320000800418004111800211091010800008000011016000003501600320032160032613235000501901017010108003811010160000160000108004280042800428004280042
320024800416000000000004701000800262121202532004810160038160000101600001600005080037719210300800220800418004103233200102016000016000020160000320000800418004111800211091010800008000011016000003501600320032160032613235000501909170107800381100160000160000108004280042800428004280042
3200248004159900000000038010008002621212025320048101600381600001016000016000050800377192106418002208004180041033032001020160000160000201600003200008004180041118002110910108000080000110160000035016003200321600326132350005019071706128003811010160000160000108004280042800428004280042
32002480041600000000000380100080026012120253200481016003816000010160000160000508003721921052080022080041800410323320010201600001600002016000032000080041800411180021109101080000800000101600000350160032003216003261035001501901317011118003811010160000160000108004280042800428004280042