Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDUR (Q)

Test 1: uops

Code:

  ldur q0, [x6, #1]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)030e0f1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
1005394310450013592012162510001000100015133034939437421732321000100010003943941110011000100001000010391039103961043731161137101041000395395395398376
100437430044101383211212510001000100015267137340139822132561000100010003743941110011000100001000431000000103801390731161139514071000399395375399395
1004398300451013592112162510001000100015267037337439822232321000100010003983741110011000100001000010390001038613907311611395141471000399375395399399
1004374300440013590011925100010001000152671349398398197325210001000100039839411100110001000010000103800381038004007311611371141441000375395375399399
1004398300441003830011925100010001000140600373398398221325610001000100039839411100110001000010004310000038103860044731161139101401000375399399399375
10043983000101383000162510001000100015274137339839822132561000100010003943941110011000100001000431038003810386139447311611371141471000375395395375399
1004394300441013832011925100010001000153380373398398198325610001000100039839411100110001000010004410390038103860394473116113950001000399395399375375
1004398300440013832110251000100010001406003493943941973256100010001000398394111001100010000100001038200103860044731161139501071000399395395375375
10043743000100383210025100010001000152740349374398197323210001000100037439411100110001000010004310380038103900390731161139101041000399395395399399
1004378300440013592001925100010001000140600373394398221323210001000100039839411100110001000010004310380038100001043731161139514071000399395375375410

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldur q0, [x6, #1]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0050

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f191e1f22243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50205120053899111100593520001200301195151096032560103401021000210000301001000010000107905357463156136419012003201200561200411131510311367350100302001000010000602001000010000120041120053115020110099100401001000010000010010002211000200011000011011003210510723119663400040081000040100120057120042120057120058120057
50204120041899100000200001200421195131094662560106401021000210000301001000010000107902657363326133458012003201200561200561131480311367050100302001000010000602001000010000120056120053115020110099100401001000010000110010001101000200011000011111003210413533119660400049601000040100120042120057120057120057120057
50204120056899120004700001200351195091094612560103401351000010000301001000010000107899957361886136166012002601200351200501131410311366750100302001000010000602001000010000120050120047115020110099100401001000010000010010000011000000031000410000003210310133119657400046081000040100120057120042120054120042120432
50204120041899110000101001200321194931094632560100401021000110000301001000010000107899957361886136166012001101200501200351131450311366750100302001000010000602001000010000120050120047115020110099100401001000010000010010002211000100111000011111003210313533119654400049951000040100120054120057120054120042120054
50204120053899110000200001200201194931094612560103401001000110000301001000010000107899957361886133818012002601200501200501131450311365850100302001000010000602001000010000120050120047115020110099100401001000010000010010002211000200111000011011003210310733119663400020951000040100120051120051120036120051120051
50204120050899000000101001200381195151094692560106401041000210000301001000010000107905357364766136419012003201200561200561131510311367350100302001000010000602001000010000120056120041115020110099100401001000010000010010002211000300011000011110003210310733119663400029681000040100120057120057120054120057120057
50204120056899100000200001200261195151094552560106401041000110000301001000010000107909757364766133458012003201200561200531131510311367350100302001000010000602001000010000120056120053115020110099100401001000010000010010000011000000031000010000103210310133119657400006651000040100120051120051120051120048120036
50204120050899000000100001200351194931094492560100401021000010000301001000010000107899957354556134461012002601200501200501131450311365850100302001000010000602001000010000120050120047115020110099100401001000010000010010000011000000001000000000003210310733119657400026001000040100120051120036120051120051120051
50204120050899000000000001200321195091094612560103401001000110000301001000010000107920057354556133818012002601200501200471131450311363851071302001000010000602001000010000120037120048415020110099100401001000010000010010000011000001701000100102003237410733119660400020981000040100120036120037120038120054120039
502041200369010110101800001200361195191094612560103401021000110008301001000010000107920057360446133818012001101200471200531131450311363650100302001000010000602001000010000120050120035115020110099100401001000010000010010000011000000001000010104003210412463119657400020651000040100120048120052120051120051120054

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0061

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e1f22233f43494d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
500251200618991101103010120046101195191094742560019400161000310000300101000010000107964757361406134172112003712006112004911317931137005001030020100001000060020100001000012006112004911500211091040010100001000001010002221000301210001120113141210722119676400060651000040010120050120062120106120062120062
500241200618991011103010120046111195071094742560019400141000210000300101000010000107964757361406133560112003712004912006111317931137005001030020100001000060020100001000012006112004911500211091040010100001000001010002221000301510001121113141310722119676400066601000040010120062120050120062120050120062
500241200498991101018010120046111195191094742560019400161000310000300101000010000107964757367166133560112002512006112006111317931136885001030020100001000060020100001000012006112004911500211091040010100001000001010003221000201210001121113141210722119676400066601000040010120050120062120062120062120050
5002412006189910010030001200461111951910946225600194001410003100003001010000100001079647573671661341721120025120049120061113179311368950010300201000010000600201000010000120061120061115002110910400101000010000010100022210004551210001121113141210722119664400066651000040010120062120062120062120050120050
500241200498991111008000120046101195651094622560019400161000210000300101000010000107954157367166134172112002512006112006111317931137005001030020100001000060020100001000012006112006111500211091040010100001000001010002301000300210001121103141210722119664400060651000040010120062120062120050120050120062
500241200498991111002000120046111195191094622560019400141000310000300101000010000107954157361406134172112003712006112006111317931136885001030020100001000060020100001000012004912004911500211091040010100001000001010003301000201210001121113141210722119676400066601000040010120050120050120062120050120050
5002412006189910000030001203941111951910976125600194001410003100043001010000100001079541575119761341721120449120061120498113167311368850496300201000010000600201000010000120061120062115002110910400101000010000110100012210003112100011211231412107281196764001262351000040010120052120062120050120062120062
500241200619001000003000120046111195191094622560019400141000310000300101000010000107964757367166134172112003712004912006111317931136885001030020100001000060020100001000012006112006111500211091040010100001000001010001121000300210001021113141210722119676400060651000040010120062120062120062120062120062
50024120049899100110800012004610119507109474256001640016100031000030010100001000010795505736716613417201200371200491200491133043113688500103002010000100006002010000100001200611200611150021109104001010000100000101000122100020121000112111314110107221196764000602301000040010120062120112120050120062120062
500241200968991011002010120080111195191094742560019400161000310000300101000010000107964757361406134172112002512006112006111316731137005001030020100001000060020100001000012006112006111500211091040010100001000011010001121000201210001121103141210722119676400066651000040010120062120062120062120062120062

Test 3: throughput

Count: 8

Code:

  ldur q0, [x6, #1]
  ldur q0, [x6, #1]
  ldur q0, [x6, #1]
  ldur q0, [x6, #1]
  ldur q0, [x6, #1]
  ldur q0, [x6, #1]
  ldur q0, [x6, #1]
  ldur q0, [x6, #1]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)030e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020526728200000002267072001825801001008000011280000500116652512669726727267271665031666580100200800002008000026732268351180201100991001008000080000110080000080039003980000600435110116112671901062800001002672826708267282670826728
802042670720000450022671220121125801001008000010080000500116723112670226727267071664531668580100200800002008000026727267221180201100991001008000080000110080000398003500080039613505110116112672301064800001002672826728267282670826708
802042670720000450002671201212162580100100800001008000050011665251267022672726727166503166858010020080000200800002672726722118020110099100100800008000001008000039800001035800000035395110116112670401064800001002670826708267282672826708
802042670720011000226692212016258010010080000100800005001167808126702267072672716630316685801002008000020080000267272672211802011009910010080000800000100800000800000039800396135435110316112672406162800001002671326723267232670826728
80204267072000057000267122121216258010010080000100800005001165789126702267272670716650316685801002008000020080000267272670711802011009910010080000800000100800003980035004280039013905110116112672401000800001002672826728267282672326708
80204267282000045102267122181202580100100800001008000050011770381266822672726727166503166858010020080000200800002673726722118020110099100100800008000001008000039800000039800396139435110116112672400104800001002672826708267082672826728
802042672720000450022669221212162580100100800001008000050011665251267022672726727166503166808010020080000200800002672726722118020110099100100800008000001008000043800350039800006035435110116112670400100800001002672826723267082672826708
80204267222000000002671221212122580100100800001008000050011665251267022670726722166503166658010020080000200800002671326722118020110099100100800008000001008000043800390039800396035435110116112672401004800001002672826708267082670826723
8020426727200000102267120120122580100100800001008000050011672311267022672726707166303166658010020080000200800002670726722118020110099100100800008000001008000039800390039800390139435110516112672701000800001002688126884267282672826708
80204267072001142002266922120162580100100800001008000050011672311266822670726727166303166858010020080000200800002672726722118020110099100100800008000001008000008003500398003901350511011611267190064800001002672826708267282672826728

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800252673620011100670012672230020258001010800001080000501167791126690026736267361668231671680010208000020800002673626736118002110901010800008000011080020194380059200218003961604319100502041666267331313580000102673826737267372673826738
800242673620011100671032672237718258001010800001080000501167793026712326744267451668131671680010208000020800002674126736118002110901010800008000001080020204380058000608004061194319000502081643267341313580000102673826737267162671626737
80024267372001010066103267003700258001010800001080000501168286026711026737267371668131671680010208000020800002673626736118002110901010800008000001080021214380059001618000061584319100502041687267341313580000102673726715267372673726715
80024267152001000021103267223702025800101080000108000050116828612671102673626736166813167168001020800002080000267362673611800211090101080000800000108001919080059110218004001584319100502091643267341313580000102673726738267382673726737
800242673720011000661012671601121625800101080000108000050116689612670302672826728166723166888001020800002080000267272672711800211090101080000800000108000004380039000080039010430020502041644267241010080000102670926728267092672826729
80024267312000000045101267160111625800101080000108000050116884312670202670826731166723167078001020800002080000267312672811800211090101080000800000108000004380039000398003961043000050207166726734130580000102673826738267372673726737
8002426736200101106910326722377182580010108000010800005011694511266890267362673616681316694800102080000208000026736267361180021109010108000080000010800212043800591026180000005901900050204164726733130580000102673726738267152673826716
80024267362001011167103267003072025800101080000108000050116779112671202671426737166823167178001020800002080000267362673611800211090101080000800001108002020438005910161800006159431900050207167826734130080000102673726737267382673726737
800242671420011000660032672437720258001010800001080000501166960126712326888267371665931671680010208000020800002671526736118002110901010800008000011080021214380059000608004061594319000502041644267331313580000102673726738267372673726738
80024267362001110021103267213772025800101080000108000050116828612669002673726715166813166958001020800002080000267362673611800211090101080000800000108001920438006010061800406158431920050204163426733130580000102673826737267372671626737