Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD2 (multiple, 8B)

Test 1: uops

Code:

  ld2 { v0.8b, v1.8b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.006

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.006

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)0e0f1e223a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6300529341220161800314668288441117105300620041000200010005000238985227142906829286310300010002000100020002919429195116100110001000010002100101100020212861934869323136646204663212382015464828363163161413014839100020002928829232293002924529218
63004292452191318003146072879411171863006200410002000100050002379852266829027292983103000100020001000200029140290871161001100010000100021001111000212130119175696031381351206433107381616544528330162391413714852100020002918629236292302928129278
6300429225219141400214676288350017217300620061000200010005000238586226812909829318310300010002000100020002918729123116100110001000010003100003100120312938943368673080744206143111381313504728369161811408014933100020002925729258292412922029242
6300429190219141710314620286470117056300420041000200010005000239049227792901729223310300010002000100020002920729138116100110001000110003100000100020313102909768363194839205423048381611413728359159901410914864100020002920729234292002932329251
6300429250220161600314628287630017059300420041000200010005000239034227402898429195310300010002000100020002900629122116100110001000010003100004100101313059927068483094945207183055381616444228304163441396615041100020002919429178292442920729217
63004291922191616007147672867400170943004200410002000100050002388642271729094292393103000100020001000200029175291381161001100010000100031001011001313129479508692431631249206383100381415494628364161231396014923100020002927829297293052931429217
6300429310219131000214613288620017162300420061000200010005000239005227242908429284310300010002000100020002917029096116100110001000010002100101100021313104932368183128747205483190382214414328425163271397714977100020002925529302293302935129348
6300429384220192300014643288131117306300620061000200010005000238644226792899029333310300010002000100020002917129147116100110001000110003100000100031212897941368383206839205943093381810464628330163411392914677100020002927829247293342927929222
630042926221916210071476028698001715730042006100020001000500023896222683290282926931030001000200010002000291442908411610011000100011000310010010012131291792386929306384720609316738205424628335160761379615083100020002934629230292422938529270
6300429270219151500204714287741017177300020061000200010005000238887227242911629287310300010002000100020002908029118116100110001000010003100001100021313222941768743077844205953216381712454128481162291401314904100020002932829299293692928729302

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.8b, v1.8b }, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7020514005310491001100201014003513953413932512935625801034010030003100003010030000100001264342669338820081263014002301400501400501305580313111870100302001000030000602001000030000140050140053115020110099100401001000010000010010000011000001001000010003210031534313954440000669100002000040100140051140051140051140051140036
7020414003510490000000001014003513940413934312934125801004010030003100003010030000100001264342669353520081263014002301400471400471305310313111870100302001000030000602001000030000140050140128115020110099100401001000010000010010000011000000001000011003210031283313954440000060100002000040100140036140051140051140051140036
7020414005010490000000600014002013940413932512935325801034010030003100003010030000100001264342669279120081263014002301400501400501305580313112670100302001000030000602001000030000140047140053115020110099100401001000010000010010000001000001401000011003238021403413954440000066100002000040100140051140048140051140051140051
7020414005010490000000100014003513940413934312935625801034010030003100003010030000100001264328669279120079451014002601400351400861305310313114670100302001000030000602001000030000140082140122115020110099100401001000010000010010000001000000001000011003210031283413956340000660100002000040100140051140036140036140051140051
70204140050104900000001000140021139534139338129341258010340100300031000030100301481009912643106698184201027941140029014005014005113060702013124570784302001000030000602001000030000140035140105115020110099100401001000010000010010000011000001001000001003210021283313960340000909100002000040100140036140051140048140048140160
70204140050104900010001200014014913943113934312934125801034010030003100003010030000100001264328669279120081263114002601400381400521305580313114670100302001000030000602001000030000140035140053115020110099100401001000010000010010000001000000001000011003210041533413954440000969100002000040100140051140036140048140036140048
7020414005010490000000001014005313940513932612935625801034010030000100003010030000100001264342669279120081263114002601400471400471305430313111870100302001000030000602001000030000140047140053115020110099100401001000010000010010000011000000001000010003210041534413956340000906100002000040100140057140051140054140071140036
7020414003510490000000100014003513953413934312934125801004010030003100003010030000100001264310669279120081263014002601400501400501305580313112670100302001000030000602001000030000140050140053115020110099100401001000010000010010000011000000001000001003210041333413954440000960100002000040100140036140036140051140051140048
7020414005010490000000100014003513943113932512935625801004010030003100003010030163100001264364669338820081263014002601400501400351305580313111870100302001000030000602001000030000140035140053115020110099100401001000010000010010000011000000001000011003210041283413955940000969100002000040100140036140048140048140048140049
7020414005010490000000001014002013943113934312935325801034010030000100003010030000100001264310669338820081263014001101400501400501305580313112670100302001000030000602001000030000140035140053115020110099100401001000010000010010001011000000001000011003210041284313954440000060100002000040100140051140036140051140048140051

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cdcfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
7002514005310490000000100001400351394461393431293562580013400103000310000300103000010000126474866935352008126300140026014005014005313056831311757001030020100003000060020100003000014005014004711500211091040010100001000001010000011000006100001101031400331213213957240000666100002000040010140051140051140051140048140051
7002414005010490000000100001400351394911393431293562580013400103000310000300103000010000126477166935352008126300140011014005014005013056831311757001030020100003000060020100003000014005014003511500211091040010100001000011010000011000000100001100031400541202213957240000966100002000040010140051140051140051140051140048
7002414003510490000000100001400351394911393381293562580013400103000310000300103000010000126477166935352008126300140026014004714005013056531311757037930020100003000060020100003000014005014004711500211091040010100001000001010000011000000100001100031405521212213957240000669100002000040010140051140051140058140051140048
70024140047104800010002301001400351394911393431293414680013400103000310000300103000010000126477166935352008126300140026014005014005013056531311757001030020100003000060020100003000014006314005011500211091040010100001000001010000011000000100001100031400521202213957240000969100002000040010140051140051140051140051140051
7002414005010490000000100001400321394911393431293562580013400103000010000300103000010000126477166933882008126315140026014004714005013056831311767001030020100003000060020100003000014004714004711500211091040010100001000001010000011000000100001100031405541212213957240000999100002000040010140036140036140051140051140051
7002414003510490001100100001400351394911393431293562580013400103000310000300103000010000126471966933882008126300140026014004714004713056531311757001030020100003000060020100003000014005014004711500211091040010100001000001010000001000000100001000031405531212213957240000999100002000040010140041140051140048140048140051
70024140050104900000001301001400361394951393431293562580013400103000010000300103000010000126478066935352009964415140023014005014003513062031312217001030020100003000060020100003000014005014043511500211091040010100001000001010000011000000100001100031400441542213957240000969100002000040010140048140051140051140051140051
7002414005010490000000100001400351394461393431293562580013400103000310000300103000010000126477166927912008126315140026014044314003513056831311757001030020100003000060020100003000014005014004711500211091040010100001000001010000011000003100051100032480541202313957240000609100002000040010140051140051140051140051140051
70024140053105100000001010014002413949113933812934115080013400103000310000300103000010000127563666935352008126315140237314005014004713056831311757001030020100003000060020100003000014003714044311500211091040010100001000001010000011000010100001100031400531202213956940000909100002000040010140036140051140051140038140036
7002414005010491000000100001400201394911393431293562580013400103000310000305703000010000126477166935352008126315140011014005114005013062431311757001030020100003000060020100003000014004714004711500211091040010100001000001010000011000000100001100031400421212213957240000909100002000040010140051140051140036140051140048

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.8b, v1.8b }, [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0464

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
702051408641053111000110014044913984713932612976825801004010030003100003010030000100001268111671336620079595014044014046414003613095803131534701003020010000300006020010000300001404641400361150201100991004010010000100000100100000110000000100001010000321011331113997440000666100002000040100140465140465140465140037140038
702041404641052000000100014044913940513975312976825801034010030003100003010030000100001268111671336620141197114001214046414003613095803131534701003020010000300006020010000300001400361400361150201100991004010010000100000100100000110000000100001010001321011541113997440000660100002000040100140465140465140465140465140465
702041404641052000000700014044913984713975312934225801034010030003100003010030000100001264319671336620141197114044014046414003613095803131534701003020010000300006020010000300001404641404641150201100991004010010000100000100100000110000003100001010000321011331113997440000660100002000040100140037140465140465140465140465
702041404641052000000100014044913984713975312976825801034010030003100003010030000100001268111671336620141197114044014046414046413095803131534701003020010000300006020010000300001404641400361150201100991004010010000100000100100000110000000100001010001321011541113997440000066100002000040100140465140465140465140465140037
7020414046410490000001010014044913984713975312976825801034010030003100003010030000100001268111671336620141197114044014046414003613095803131534701003020010000300006020010000300001404641404641150201100991004010010000100000100100000110001000100000010000321011541113997440000060100002000040100140465140465140465140465140465
702041404641052000000110014002113984713932612976825801004010030003100003010030000100001268111671336620141197014044014046414046713053203131127701003020010000300006058010000300001404641404641150201100991004010010000100000100100000110000000100001000000321011541113997440000660100002000040100140465140465140037140465140465
702041400361053000000000014002113961813975312976825801004010030003100003010030000100001268922669283920141197114044014046514003613095803131534701003020010000300006020010000300001404641400361150201100991004010010000100001100100000010000104175100000010000321011541113954540000600100002000040100140037140465140465140465140465
702041404641049000000010014044913984713975312976825801034010030003100003010030000100001268111671336620141197114044014046414003613095803131534701003020010000300006020010000300001404641404641150201100991004010010000100000100100000010000000100001010000321011331113997440000666100002000040100140037140037140037140037140037
702041404641052000000110014002113984713975312976825801004010030003100023010030000100001266220669283920141197114044014003614046413095803131534701003020010000300006020010000300001404641404641150201100991004010010000100001100100000110000000100001010000321011541113997440000660100002000040100140465140465140465140465140465
702041404641052000000100014002113984713975312976825801004010030007100003010030000100001268111671336620079595114001314046414047313053203131128701003020010000300006020010000300001404641404641150201100991004010010000100000100100000110000000100001000000321011541113997440000666100002000040100140037140465140465140465140037

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0260

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d4e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
70025140515105302010003840529429921001142711139716139552129565679801544018330030100003043230590105471342360675431120282470114023614026014176513081321131384700103002010000300006002010000300001402941402601150021109104001010000100001101000001100000001000010100314061224213978140000101010100002000040010140261140261140261140261140261
700241402601050000000000100000140245139954139552129565888001340010300131000030010302951005012666626709760201207021140236140260140260130808313138470010300201000030000600201000030000140260140260115002110910400101000010000110100000110000100100001010031403120351397814000010010100002000040010140261140261140261140200140261
70024140260105000000000058010001402451397161395521293422580013400103000310000300103000010000126666267036132011193011402361402601402601307773131162700103002010000300006002010000300001402601402601150021109104001010000100000101000001100000031000010100314041224313978140000101010100002000040010140261140261140355140265140037
7002414026010510000000001010001402451397161395521295652580013400103000310000300103000010000126666267036132011193011402361402601403101311973131806700103002010000300006002010000300001406821406821150021109104001010000100000101000311100020211000011110314021222414020140000101010100002000040010140261140261140037140261140261
700241400361051000000000101000140245139488139552129565258001340010300031000030010300001000012666626703613201119301140012140260140309130778313116270010300201000030000600201000030000140260140260215002110910400101000010000010100000110000700100001010031405122421397814000010100100002000040010140261140261140181140261140261
7002414026010500000000001010001402451397161395521295652580013400103000310000300103000010000126472867036132011355011402361402601400361305543131384700103002010000300006002010000300001402601402601150021109104001010000100000101000000100000001000010100314051225513978140000101010100002000040010140261140261140261140261140261
7002414026010510000000001010001402451397161395521295652580013400103000310000300103000010000126666267036132011193011402361402601402601307773131384700103002010000300006002010000300001402601402601150021109104001010000100000101000001100000001000010112314041225514020140000101010100002000040010140683140683140683140683140686
700241406821054101011000201001140667139826139972129985258001640010300061000030010300001000012704656723821201725741140658140683140682131197313180670010300201000030000600201000030000140682140682115002110910400101000010000010100011010001001100001111131404462413978140000101010100002000040010140261140261140261140261140261
700241402601051000000000812010011406671402351397471299852580016400103000610000300103000010000127046567238212017257411406581402601402601305543131162700103002010000300006002010000300001400361402601150021109104001010000100000101000001100000031000010100314031225513978140000101010100002000040010140261140261140261140261140261
700241402601049000000000101000140245139716139552129565258001340010300031000030010300001000012666626703613201119301140236140260140260130777313138470010300201000030000600201000030000140260140260115002110910400101000010000010100000110000000100001010031403122421397814000001010100002000040010140261140261140261140261140261

Test 4: throughput

Count: 8

Code:

  ld2 { v0.8b, v1.8b }, [x6]
  ld2 { v0.8b, v1.8b }, [x6]
  ld2 { v0.8b, v1.8b }, [x6]
  ld2 { v0.8b, v1.8b }, [x6]
  ld2 { v0.8b, v1.8b }, [x6]
  ld2 { v0.8b, v1.8b }, [x6]
  ld2 { v0.8b, v1.8b }, [x6]
  ld2 { v0.8b, v1.8b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5009

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e22233a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
240205400623000001000681034003209902524019110016002480008100160020800145004157225440882040050400694006999800610018240134200800141600322008001416002640070400691180201100991100100800008000011008001616458001610061800426131451601115116116114006613132800001600001004007040070400704007040070
240204400693001110000671034005420052524019110016008280008100160020800145004192705440882040050400694006999800610018240134200800141600262008001416002640069400691180201100991100100800008000001008001617458005810162800420157451601115116116114006613130800001600001004007040070400704007040048
240204400473001110000200034005529902524013210016008280008100160020800145004180935440882040028400694006999800610018240134200800141600262008001416002640047400691180201100991100100800008000001008001817458005800161800426157451611115116116114006613130800001600001004007040070400704007040070
24020440047300101000068102400543090252401911001600928000810016002080014500419247272016804005040069400479980069996240134200800141600262008001416002640047400471180201100991100100800008000001008001618458005800162800446057451601115116116114006613130800001600001004007040070400484007040070
240204400692991100030681024003229952524019210016008480000100160000800005004157465440750040286400694006999780310027240100200800001600002008000016000040047400691180201100991100100800008000001008001616458005811062800426158451620005110216224006613131800001600001004007040070400704007040070
24020440069301110000068103400543995252401801001600848000010016000080000500415756544075004005040069400699978031002724010020080000160000200800001600004006940047118020110099110010080000800000100800171645800161006280042605745160000511021622400661300800001600001004007040048400704007040048
240204401643001100000671034005427952524018410016008280000100160000800005004191835440750040051400474006999780310027240100200800001600002008000016000040069400691180201100991100100800008000001008001616458001600061800426158451600005110216224006613132800001600001004007040070400704007040048
2402044006930010100006810240054399525240184100160090800001001600008000050041920427200340400504004740069997803100052401002008000016000020080000160000400694006911802011009911001008000080000010080017184580016100618004260580160000511021622400661302800001600001004007040048400704007040070
240204400693001000000680024005439952524018410016009080000100160000800005004199895440750040050400694006999780310027240100200800001600002008000016000040069400691180201100991100100800008000001008001617458005810162800006158431620005110216224004413130800001600001004007040048400704007040070
24020440069300100000073001400542090252401841001600908000010016000080000500419204544075004005040069400699978031002724010020080000160000200800001600004006940069118020110099110010080000800001100800161645800590016280042615745161000511021622400660130800001600001004007040070400704007040048

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f181e22243a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
240025400552990001030101400401010025240080101600708000010160000800005041874539200140400364005940059999631003524001020800001600002080000160000400594005511800211091010800008000011080000008002400024800316124375020416324005210680000160000104005640060400604006040060
240024400553000001037100400441110025240080101600708000010160000800005041873539200141400364005540055999631003524001020800001600002080000160000400594005511800211091010800008000001080000030800310000800246124375020416424005661080000160000104006040060400604006040060
240024400593000001030100400411010025240064101600548000010160000800005041851839200141400364005940059999631003924001020800001600002080000160000400554005511800211091010800008000001080000030800240002480024612430502021634400526680000160000104005640060400564005640056
2400244005529900010301004004010110252400801016005480000101600008000050418745392001404003640055400559996310035240010208000016000020800001600004005540055118002210910108000080000010800000308000000024800246124375020316314005261080000160000104005640060400604006040060
2400244005930000010301014004401002524008010160070800001016000080000504190214560014040036400554005999963100352400102080000160000208000016000040059400551180021109101080000800000108000003080031010348003160243050204163140052101080000160000104006040042400604006040060
24002440059300000100100400440100252400801016007080000101600008000050418518456001404002340059400599996310035240010208000016000020800001600004005540055118002110910108000080000010800000308003100031800246124305020216244005210680000160000104006040060400604006040060
2400244005930000010300014004011110252400801016007080000101600008000050419009392001404003640059400599996310035240010208000016000020800001600004005940055118002110910108000080000010800000308003100031800316124305020416424005610680000160000104005640060400604004240056
2400244005930000010371004004010100252400801016007080000101600008000050418745392001414003640055400599996310035240010208000016000020800001600004005940055118002110910108000080000010800000308000000031800310124305020316314005210680000160000104005640042400564006040056
2400244005530000010301014004410102625240064101600708000010160000800005041900945600140400364005940059999631003924001020800001600002080000160000400414005511800211091010800008000001080000030800310003180031612430502031652400526080000160000104005640056400564005640056
2400244005530000010371014004410100252400801016007080000101600008000050419009392001404004040055400559996310039240010208000016000020800001600004005940041118002110910108000080000010801240308002400031800246124305020116134005210680000160000104006040060400424006040060