Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDUR (D)

Test 1: uops

Code:

  ldur d0, [x6, #1]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030508090b0e0f1e22233a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)92inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
100540331111016710338837719251000100010001552437740340222532571000100010004034031110010100010001102020431062116010396158431917311611400131351000404404404404404
100440231010006700238837719251000100010001552637840240322532611000100010004034031110010100010000101919431059106110396159431917311611400131351000404404407404403
100440331111006700338817719251000100010001545937840340322532611000100010004034031110010100010000102019431059016110276158431917311611400131351000404404404403404
100440321101006600238827719251000100010001550637940340522532611000100010004034031110010100010000102019431058106110396159431927311611400131351000404404404403404
100440231111006700338837720251000100010001552637840240322532611000100010004034021110010100010000101919431058106010396159431927311611400131351000407403404415404
100440331101007010338937720251000100010001555540140540322532601000100010004034031110010100010000102019431058106010406160431907311611400131351000404403404404404
100440331111006600338827718251000100010001556737940340222532611000100010004024031110010100010000102020431059006010406158431927311611400131351000403405404404404
100440331100006600238827720251000100010001555537840340322632601000100010004034021110010100010000101920431058006110406159431907311611400131351000404404404404404
100440331111006700238737719251000100010001555537840340325432611000100010004034021110010100010000101919431058006110406159431917311611400131351000404404404404404
100440331001006600239327718251000100010001555537840340322532621000100010004034031110010100010000102019431059006010396159451907311611400131351000404404423404403

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldur d0, [x6, #1]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0056

retire (01)cycle (02)0305080b0e0f18191e1f22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
502051200478991111100200001200381195151094662560106401041000210000301001000010000107924257368126136676012003212009112005611314831136735010030200100001000060200100001000012005912005311502011009910040100100001000001001000140100030121100001111103210113511119660400049681000040100120042120057120057120057120042
502041200538991100100201001200411195151094552560106401041000210000301001000010000107902657357506136419012003212005612005611313731136305010030200100001000060200100001000012005312005311502011009910040100100001000001001000120100020214100001111003210110711119663400029981000040100120050120057120057120063120056
502041200539001010001700011200261195131094552560106401021000210000301001000010000107905357363326136572012003212005612005611315131136735010030200100001000060200100001000012005612005311502011009910040100100001000001001000211100030011100001101003210010711119663400049681000040100120057120042120057120057120055
502041200538991000000100011200381195151094692560103401041000110000301001000010000107902657364766136572012001712004112005311315131136705010030200100001000060200100001000012005612005311502011009910040100100001000001001000210100030204100000111103210113511119663400020681000040100120057120057120057120057120043
502041201168991111100200001200411195151094692560106401041000210000301001000010000107905357364766136419012002912005612005311314831136305010030200100001000060200100001000012005612004111502011009910040100100001000001001000211100020301100001101103210113511119663400046081000040100120057120057120057120042120058
502041200418991110000100001200381195351094692560106401041000110000301001006210000107905357364766133458012003212005312005311315131136735010030200100001000060200100001000012005612004111502011009910040100100001000001001000111100040001100001010003210110711119646400020001000040100120051120036120051120053120053
502041200508990000100101001200351195091094492560106401041000110000301001000010000107902657364766136572012003212005612004111315131136735010030200100001000060200100001000012004112004111502011009910040100100001000001001000211100030001100001111003210113511119663400049981000040100120042120057120057120042120237
502051200569001000000200001200411195151094692560103401041000210000301001000010000107909757364766136572012003212005612005611315131136305010030200100001000060200100001000012005612004111502011009910040100100001000001001000111100030421100001111103210113511119660400049681000040100120057120057120042120042120056
5020412005689910000002000012004111951510945525601064012810002100003010010000100001085906573647661364190120029120056120041113728311367350100302001000010000602001000010000120061120064215020110099100401001000010000010010002211000204901100001101003210110711119663400049951000040100120054120042120042120099120065
502041200568991100000200001200411195151094662560106401041000110000301001000010000107905357364766136572012003212005612005611313731136735010030200100001000060200100001000012005612005311502011009910040100100001000011001000221100020401100000111003210113511119663400026901000040100120057120057120054120057120067

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0049

retire (01)cycle (02)0308090b0e0f191e1f22243f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9fa0a1a3a6a8acafb5dcache load miss (bf)branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
500251200508990000106101012003511950810946425600134001210001100003001010000100001079548573618861327571120014120047120039113168311367450010300201000010000600201000010000120050120047115002110910400101000010000101000001100000121000001003140310723119662400009681000040010120051120051120051120051120051
50024120051899010000511108001200351195081094632560013400121000110000300101000010000107951757361886133499012002612004712005011316831136895001030020100001000060020100001000012005012004711500211091040010100001000010100000110000001000011003140310732119665400026081000040010120051120051120051120036120051
5002412004790000000010101200201195081094492560013400121000110000300101000010000107951757360446132757012002612005012005011316831136905001030020100001000060340100001000012003512004711500211091040010100001000010100000010006171761000211003140210724119841400199981000040010120036120051120051120036120048
5002412004789901011013000120035119492109463256001340012100011000030010100001000010795575736188613349901200261200521200501131683113689500103002010000100006002010000100001200501200471150021109104001010000100001010000011000001471000011003140310734119968400009981000040010120036120052120051120048120048
500241200508990100001301012002011951010944925600134001210001100003001010000100001079517573545561334991120026120050120050113168311368950010300201000010000600201000010000120050120047115002110910400101000010000101000021100000301000000003140210732119665400020981000040010120051120092120036120051120099
50024120047899000010901012002011950810946325600104001210001100003001010000100001079548573618861334990120026120050120035113153311368950010300201000010000600201000010000120035120047115002110910400101000010000101000001100000121000011003140310733119665400009081000040010120036120048120052120048120051
5002412003590000011017500012003511950810946325600134001010000100003001010000100001079548573618861334990120026120050120035113153311368650010300201000010000600201000010000120050120047115002110910400101000010000101000001100000121000001003140310732119650400029001000040010120048120051120048120057120048
50024120048899000000100012002011950810946125600134001010002100003001010000100001079517573604461334991120011120050120035113153311367450010300201000010000600201000010000120047120047115002110910400101000010000101000000100000121000010003140310723119650400119081000040010120036120036120051120051120051
50024120047899000000000012003511950810944925600134001210000100003001010000100001079548573618861327570120026120035120035113168311368950010300201000010000600201000010000120050120047115002110910400101000010000101000001100000121000001003140310723119650400029681000040010120036120036120051120051120051
5002412005489900000010101200201194921094612560010400121000010000300101000010000107954857361886132757112002612005012003511316831136745001030020100001000060020100001000012005012004711500211091040010100001000010100000110000091000011003140210733119777400006681000040010120051120051120051120051120051

Test 3: throughput

Count: 8

Code:

  ldur d0, [x6, #1]
  ldur d0, [x6, #1]
  ldur d0, [x6, #1]
  ldur d0, [x6, #1]
  ldur d0, [x6, #1]
  ldur d0, [x6, #1]
  ldur d0, [x6, #1]
  ldur d0, [x6, #1]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire (01)cycle (02)03050708090b0e0f191e22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8020526737200101110001291032669937723258010010080000100800005001169868026716269292704316664316695801002008000020080000267362673611802011009901001008000080000010080020194308005911206480000615943191513241644267330135800001002673726738267372695826920
80204267432001101111167103267210704258010010080000100800005001167815026689267362673616658316694801002008000020080000267402671411802011009901001008000080000010080019190080059032195480040615943191511042583267111305800001002673726738267372673726783
80204267242001011000067001267210702225801001008000010080000500116744602671126740267371663731667580100200800002008000026736267371180201100990100100800008000001008001919008005916317080040005943192511031634267331305800001002673826844267972687126754
802042674020010011000761042669927020258010010080000100800005001167514026689267362673616663316695801002008000020080000267362671411802011009901001008000080000010080019190080058220906801706059431915110516352687013130800001002690226822270272713027352
80204267452011000100079103267210771925801001008000010080000500116776802671126737267141665931667280100200800002008000026737267361180201100990100100800008000001008002219008005913163800406019019151104166626733005800001002674126983267402672426742
802042673620010101000671032672137702580100100800001008000050011675550267112671426736166373166948010020080000200800002674026736118020110099010010080000800001100800201943080059151798003900594319151103164426711005800001002674126737267372671526769
80204267492001010100067003267223701258010010080000100800005001166758026711267362673616658316695801002008000020080000267362673611802011009901001008000080000010080149194398804491406180000015943190511041634267401305800001002673727002267532683026737
80204267142001011100021003267213072325801001008000010080000500116776402671126736267361665831667280100200800002008000026736267441180201100990100100800008000001008002019008001900263800406159431915110416442671113135800001002671526928267392684626716
802042673620010100000670032672137710225801001008013010080000500116799512671126736267361665931667280100200800002008000026736267361180201100990100100800008000001008002019008005910221800396019431915110416432673313130800001002671626738268682681726741
80204267142001010100021013267213077225801001008000010080000500117243702668926737267141665931667280100200800002008000026737267361180201100990100100800008000011008001919008005900161800406159431915110416442673313135800001002673726737267722682826737

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3340

retire (01)cycle (02)03090b0e0f1e22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8acafb5b6bbdcache load miss (bf)cficache miss (d3)d5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80025267222001000411012670721818142580010108000010800005011680001266970267222672216667316705800102080000208000026722267221180021109101080000800000108000003980035260800356135050200311628272671966280000102672326723267232672326732
800242674920000004110126707218181225800101080000108000050116760512669702672226722166523166888001020800002080000267222672211800211091010800008000011080000039800350358003561353950200261615262672566280000102672326723267232670926727
80024267272000000411012671221801225800101080000108000050116760512669702672726722166523167028001020800002080000267222672211800211091010800008000001080000039800352398000061354350200261616242672566280000102672326723267232670926737
80024267262000000501002670721218025800101080000108000050116654512669702672226722167883167028001020800002080000267272672211800211091010800008000001080000008003534180035613539502002516272626724610280000102672326723267232670926734
8002426722200000001012670701818122580010108000010800005011676051266970267082672216667316702800102080000208000026722267221180021109101080000800000108000003980035538003561353950200231617262671906280000102670926723267232670926728
80024267221990000410002670721801225800101080000108000050116760512669702672226722166673167028001020800002080000267302672611800211091010800008000001080000039800352358003501353950200271627282671966280000102672326723267092672326732
8002426722200000041101267072181811258001010800001080000501172240126697026722267221666731670280010208000020800002672226722118002110910108000080000010800000398003533580035603543502002816262626705106280000102672326729267232672326728
800242672220000004100126693218181225800101080000108000050116760512669702672226727166673167028001020800002080000267222670811800211091010800008000001080000039800352358003561353950200281617292671966480000102672326709267232672326733
80024267282010000411012670721818162580010108000010800005011676051266970267222672716672316702800102080000208000026722267081180021109101080000800000108000000800001478003561353950200161615262671966280000102672326723267232672326823
80024267272000010411012670721801125800101080000108000050117224012679102672226722166673166888001020800002080000267222672211800211091010800008000001080000039800351358003561353950200141626172671966280000102672326709267232672326737