Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDUR (D)

Test 1: uops

Code:

  ldur d0, [x6, #1]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
100540331111016710338837719251000100010001552437740340222532571000100010004034031110010100010001102020431062116010396158431917311611400131351000404404404404404
100440231010006700238837719251000100010001552637840240322532611000100010004034031110010100010000101919431059106110396159431917311611400131351000404404407404403
100440331111006700338817719251000100010001545937840340322532611000100010004034031110010100010000102019431059016110276158431917311611400131351000404404404403404
100440321101006600238827719251000100010001550637940340522532611000100010004034031110010100010000102019431058106110396159431927311611400131351000404404404403404
100440231111006700338837720251000100010001552637840240322532611000100010004034021110010100010000101919431058106010396159431927311611400131351000407403404415404
100440331101007010338937720251000100010001555540140540322532601000100010004034031110010100010000102019431058106010406160431907311611400131351000404403404404404
100440331111006600338827718251000100010001556737940340222532611000100010004024031110010100010000102020431059006010406158431927311611400131351000403405404404404
100440331100006600238827720251000100010001555537840340322632601000100010004034021110010100010000101920431058006110406159431907311611400131351000404404404404404
100440331111006700238737719251000100010001555537840340325432611000100010004034021110010100010000101919431058006110406159431917311611400131351000404404404404404
100440331001006600239327718251000100010001555537840340322532621000100010004034031110010100010000102019431059006010396159451907311611400131351000404404423404403

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldur d0, [x6, #1]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0056

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
502051200478991111100200001200381195151094662560106401041000210000301001000010000107924257368126136676012003212009112005611314831136735010030200100001000060200100001000012005912005311502011009910040100100001000001001000140100030121100001111103210113511119660400049681000040100120042120057120057120057120042
502041200538991100100201001200411195151094552560106401041000210000301001000010000107902657357506136419012003212005612005611313731136305010030200100001000060200100001000012005312005311502011009910040100100001000001001000120100020214100001111003210110711119663400029981000040100120050120057120057120063120056
502041200539001010001700011200261195131094552560106401021000210000301001000010000107905357363326136572012003212005612005611315131136735010030200100001000060200100001000012005612005311502011009910040100100001000001001000211100030011100001101003210010711119663400049681000040100120057120042120057120057120055
502041200538991000000100011200381195151094692560103401041000110000301001000010000107902657364766136572012001712004112005311315131136705010030200100001000060200100001000012005612005311502011009910040100100001000001001000210100030204100000111103210113511119663400020681000040100120057120057120057120057120043
502041201168991111100200001200411195151094692560106401041000210000301001000010000107905357364766136419012002912005612005311314831136305010030200100001000060200100001000012005612004111502011009910040100100001000001001000211100020301100001101103210113511119663400046081000040100120057120057120057120042120058
502041200418991110000100001200381195351094692560106401041000110000301001006210000107905357364766133458012003212005312005311315131136735010030200100001000060200100001000012005612004111502011009910040100100001000001001000111100040001100001010003210110711119646400020001000040100120051120036120051120053120053
502041200508990000100101001200351195091094492560106401041000110000301001000010000107902657364766136572012003212005612004111315131136735010030200100001000060200100001000012004112004111502011009910040100100001000001001000211100030001100001111003210113511119663400049981000040100120042120057120057120042120237
502051200569001000000200001200411195151094692560103401041000210000301001000010000107909757364766136572012003212005612005611315131136305010030200100001000060200100001000012005612004111502011009910040100100001000001001000111100030421100001111103210113511119660400049681000040100120057120057120042120042120056
5020412005689910000002000012004111951510945525601064012810002100003010010000100001085906573647661364190120029120056120041113728311367350100302001000010000602001000010000120061120064215020110099100401001000010000010010002211000204901100001101003210110711119663400049951000040100120054120042120042120099120065
502041200568991100000200001200411195151094662560106401041000110000301001000010000107905357364766136572012003212005612005611313731136735010030200100001000060200100001000012005612005311502011009910040100100001000011001000221100020401100000111003210113511119663400026901000040100120057120057120054120057120067

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0049

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e1f22243f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
500251200508990000106101012003511950810946425600134001210001100003001010000100001079548573618861327571120014120047120039113168311367450010300201000010000600201000010000120050120047115002110910400101000010000101000001100000121000001003140310723119662400009681000040010120051120051120051120051120051
50024120051899010000511108001200351195081094632560013400121000110000300101000010000107951757361886133499012002612004712005011316831136895001030020100001000060020100001000012005012004711500211091040010100001000010100000110000001000011003140310732119665400026081000040010120051120051120051120036120051
5002412004790000000010101200201195081094492560013400121000110000300101000010000107951757360446132757012002612005012005011316831136905001030020100001000060340100001000012003512004711500211091040010100001000010100000010006171761000211003140210724119841400199981000040010120036120051120051120036120048
5002412004789901011013000120035119492109463256001340012100011000030010100001000010795575736188613349901200261200521200501131683113689500103002010000100006002010000100001200501200471150021109104001010000100001010000011000001471000011003140310734119968400009981000040010120036120052120051120048120048
500241200508990100001301012002011951010944925600134001210001100003001010000100001079517573545561334991120026120050120050113168311368950010300201000010000600201000010000120050120047115002110910400101000010000101000021100000301000000003140210732119665400020981000040010120051120092120036120051120099
50024120047899000010901012002011950810946325600104001210001100003001010000100001079548573618861334990120026120050120035113153311368950010300201000010000600201000010000120035120047115002110910400101000010000101000001100000121000011003140310733119665400009081000040010120036120048120052120048120051
5002412003590000011017500012003511950810946325600134001010000100003001010000100001079548573618861334990120026120050120035113153311368650010300201000010000600201000010000120050120047115002110910400101000010000101000001100000121000001003140310732119650400029001000040010120048120051120048120057120048
50024120048899000000100012002011950810946125600134001010002100003001010000100001079517573604461334991120011120050120035113153311367450010300201000010000600201000010000120047120047115002110910400101000010000101000000100000121000010003140310723119650400119081000040010120036120036120051120051120051
50024120047899000000000012003511950810944925600134001210000100003001010000100001079548573618861327570120026120035120035113168311368950010300201000010000600201000010000120050120047115002110910400101000010000101000001100000121000001003140310723119650400029681000040010120036120036120051120051120051
5002412005489900000010101200201194921094612560010400121000010000300101000010000107954857361886132757112002612005012003511316831136745001030020100001000060020100001000012005012004711500211091040010100001000010100000110000091000011003140210733119777400006681000040010120051120051120051120051120051

Test 3: throughput

Count: 8

Code:

  ldur d0, [x6, #1]
  ldur d0, [x6, #1]
  ldur d0, [x6, #1]
  ldur d0, [x6, #1]
  ldur d0, [x6, #1]
  ldur d0, [x6, #1]
  ldur d0, [x6, #1]
  ldur d0, [x6, #1]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020526737200101110001291032669937723258010010080000100800005001169868026716269292704316664316695801002008000020080000267362673611802011009901001008000080000010080020194308005911206480000615943191513241644267330135800001002673726738267372695826920
80204267432001101111167103267210704258010010080000100800005001167815026689267362673616658316694801002008000020080000267402671411802011009901001008000080000010080019190080059032195480040615943191511042583267111305800001002673726738267372673726783
80204267242001011000067001267210702225801001008000010080000500116744602671126740267371663731667580100200800002008000026736267371180201100990100100800008000001008001919008005916317080040005943192511031634267331305800001002673826844267972687126754
802042674020010011000761042669927020258010010080000100800005001167514026689267362673616663316695801002008000020080000267362671411802011009901001008000080000010080019190080058220906801706059431915110516352687013130800001002690226822270272713027352
80204267452011000100079103267210771925801001008000010080000500116776802671126737267141665931667280100200800002008000026737267361180201100990100100800008000001008002219008005913163800406019019151104166626733005800001002674126983267402672426742
802042673620010101000671032672137702580100100800001008000050011675550267112671426736166373166948010020080000200800002674026736118020110099010010080000800001100800201943080059151798003900594319151103164426711005800001002674126737267372671526769
80204267492001010100067003267223701258010010080000100800005001166758026711267362673616658316695801002008000020080000267362673611802011009901001008000080000010080149194398804491406180000015943190511041634267401305800001002673727002267532683026737
80204267142001011100021003267213072325801001008000010080000500116776402671126736267361665831667280100200800002008000026736267441180201100990100100800008000001008002019008001900263800406159431915110416442671113135800001002671526928267392684626716
802042673620010100000670032672137710225801001008013010080000500116799512671126736267361665931667280100200800002008000026736267361180201100990100100800008000001008002019008005910221800396019431915110416432673313130800001002671626738268682681726741
80204267142001010100021013267213077225801001008000010080000500117243702668926737267141665931667280100200800002008000026737267361180201100990100100800008000011008001919008005900161800406159431915110416442673313135800001002673726737267722682826737

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3340

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfl1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80025267222001000411012670721818142580010108000010800005011680001266970267222672216667316705800102080000208000026722267221180021109101080000800000108000003980035260800356135050200311628272671966280000102672326723267232672326732
800242674920000004110126707218181225800101080000108000050116760512669702672226722166523166888001020800002080000267222672211800211091010800008000011080000039800350358003561353950200261615262672566280000102672326723267232670926727
80024267272000000411012671221801225800101080000108000050116760512669702672726722166523167028001020800002080000267222672211800211091010800008000001080000039800352398000061354350200261616242672566280000102672326723267232670926737
80024267262000000501002670721218025800101080000108000050116654512669702672226722167883167028001020800002080000267272672211800211091010800008000001080000008003534180035613539502002516272626724610280000102672326723267232670926734
8002426722200000001012670701818122580010108000010800005011676051266970267082672216667316702800102080000208000026722267221180021109101080000800000108000003980035538003561353950200231617262671906280000102670926723267232670926728
80024267221990000410002670721801225800101080000108000050116760512669702672226722166673167028001020800002080000267302672611800211091010800008000001080000039800352358003501353950200271627282671966280000102672326723267092672326732
8002426722200000041101267072181811258001010800001080000501172240126697026722267221666731670280010208000020800002672226722118002110910108000080000010800000398003533580035603543502002816262626705106280000102672326729267232672326728
800242672220000004100126693218181225800101080000108000050116760512669702672226727166673167028001020800002080000267222670811800211091010800008000001080000039800352358003561353950200281617292671966480000102672326709267232672326733
80024267282010000411012670721818162580010108000010800005011676051266970267222672716672316702800102080000208000026722267081180021109101080000800000108000000800001478003561353950200161615262671966280000102672326723267232672326823
80024267272000010411012670721801125800101080000108000050117224012679102672226722166673166888001020800002080000267222672211800211091010800008000001080000039800351358003561353950200141626172671966280000102672326709267232672326737