Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDR (register, S)

Test 1: uops

Code:

  ldr s0, [x6, x7]
  mov x7, #4
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030508090b0e0f1e223a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a8acafb5b6bbdcache load miss (bf)cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
10053892001000411238421818122510001000100014844364399391211324710001000200038939211100110001000110000391035035103561353973116113866621000400400400399400
10043992100100651338421818162510001000100015362373398398221325710001000200039839911100110001000010000391035035103561353973116113866621000399400400399400
10043993100101650338431818162510001000100015315373399398222325610001000200039839911100110001000010000391035035103561353973116113886621000399400400399400
10043993100000650338311818112510001000100014838364389389212324710001000200038938911100110001000010000391035035103561353973116113866621000400400400399400
10043993110100650338421818122510001000100014838364389389212324710001000200038939111100110001000010000391036035103561353973116113866621000390390390391390
10043912000000410137421818122510001000100014838364389389212324710001000200038938911100110001000010000391035035103561353973116113866621000390390390390390
10043892000000410137421818112510001000100014838364391389212324710001000200038938911100110001000010000391035035103561353973116113866621000420401399399400
10043993110000661338421818122510001000100014838364389389212324710001000200038938911100110001000010000391035035103561353973116113866621000400400400400399
10043983100000650338321818122510001000100014838364389389212324710001000200038938911100110001000010000391035035103561353973116113866641000392390390390390
100438930000004101374218181225100010001000148383643903942173252100010002000394389111001100010001100003910350401035613943731161139161041000400399400400399

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr s0, [x6, x7]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire (01)cycle (02)030708090a0b0e0f18191e1f2223243a3f4d4f5051inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5dcache load miss (bf)c2cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
5020512005189900000000010000012003611951310946712560103401021000110000301001000010000107903557363806136317012002712005412005111314103113658501003020010000100006020020000100001200511200511150201100991004010010000100000100100000110000000010000110032100110711119661400021310121000040100120036120036120055120103120055
50204120054899000000000100000012002011951010946402560103401021000110000301001000010000107903557363806136317012001112003512005411314103113658501003020010000100006020020000100001200541200511150201100991004010010000100000100100000110000000010000100032100110711119646400001310121000040100120052120055120052120055120052
502041200548990000000006010001200391195101094670256010340102100011000030100100001000010790355735455613647001200301200541200351131460311365850100302001000010000602002000010000120054120051115020110099100401001000010000010010000011000007001000010003210021011111966140002130121000040100120055120036120052120036120051
502041200398990000000001010001200391195131094670256010340102100011000030100100001000010790355736332613631701200271200541200541131410311367150100302001000010000602002000010000120054120052115020110099100401001000010000010010001011000000001000011003210011071111966140000100121000040100120055120055120036120052120053
5020412005489900000000060000012003911951310944902560158401211000210000301001000010000107911657370046136317012002712005412028211314103113668501003020010000100006020020000100001200541200351150201100991004010010000100000100100000010000000010000110032100110711119661400020001000040100120055120036120055120055120055
502041200358990000000000000001200201194931094670256010340102100011000030100100001000010788625735455613446101200301200541200511131460311367150100302001000010063602002012610000120054120066115020110099100401001000010000010010000011000005001000010003210011071111966440000130121000040100120052120058120055120055120052
50204120054930000000000100000120039119493109467025601034010210001100003010010000100001079035573623661344610120030120035120054113146031136585010030200100001000060200200001000012005412003511502011009910040100100001000001001000001100000500100001100321001107111196464000201301000040100120055120036120052120052120059
50204120054899000000000100000120036119510109467025601034010010001100003010010000100001079035573638061344610120030120037120053113146031136715010030200100001000060200200001000012005112010111502011009910040100100001000001001000001100000503100001100321001101211196474000201391000040100120036120055120036120058120052
5020412005489900000100010100012003911951310944902560103401021000110000301001000010000107900857354556136317012003012003512003511314103113658501003020010000100006020020000100001200541200511150201100991004010010000100000100100000010000040010000010032100110711119661400020001000040100120052120055120052120036120052
5020412003589900000000010000012003911951310946701356010340102100011000030100100001000010790355736236613631701200301200541200551131460311408356400334561133911292682752267811294122495122417281502011009910040100100001000001001003600100351629207310039110039410231411119658400101313121000040100121172121491121263120887122288

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0058

retire (01)cycle (02)03050708090a0b0e0f18191e1f22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
500251200579001010010000201001200261194991094702560016400121000210000300101000010000107991757357986134172112001712006012006011317531136805001030020100001000060020200001000012005712004111500211091040010100001000011010002111000100111000011011314031073211967240004131091000040010120061120042120058120061120042
50024120060899101001010020000120098119518109455256001640014100031000030010100001000010799225737484613428011200171200601200411131593113680500103002010000100006002020000100001200411200411150021109104001010000100000101000931100020004100001111231402107221196754000400121000040010120061120061120042120042120061
500241200419001010000000100011200451194991094732560016400121000210000300101000010000107974257366686134121112003612004112006011315931136995001030020100001000060020200001000012004112005711500211091040010100001000001010002211000100141000011011314021072211965640004013121000040010120042120061120061120061120042
50024120060899101000110070001120026119518109473256001340014100011000030010100001000010796745745201613686411200171200601200571131753113696504943002010000100006002020000100001200601200411150021109104001010000100000101000210100020011100000111031401107221196754000400121000040010120042120061120061120061120061
500241200608991000010100200001200451195151094732560016400141000210000300101000010055107955357365726133968112003912006012006011317531136805001030020100001000060020200001000012006112009111500211091040010100001000011010002111000100011000011110314021072211965640004101001000040010120061120061120061120061120061
500241201109001010000000101001200451194991094732560016400141000410000300101000010000107968357365246133968112001712006012004111317831136805001030020100001000060020200001000012005712005711500211091040010100001000001010002111000100111000011110314031072211965640004131001000040010120042120061120042120042120058
500241200419001000000000201001200261195181094732560013400121000110000300101000010000107975557368126133968112003612004112006011317831137025001030020100001000060020200001000012006012005711500211091040010100001000001010001111000300011000011110314021072211965640004101001000040010120042120058120061120058120061
500241201188991010010000200001200451195181094732560016400141000210000300101000010000107967957368606133968112003612006012006011315931136995001030020100001000060020200001000012006012005711500211091040010100001000001010003211000200179100001101031402107221196754000410091000040010120061120061120061120058120061
50024120102899100001100010100120045119518109473256001640014100021000030010100001000010888075736668613474011200361200571200411131783113747500103002010000100006002020000100001200601200411150021109104001010000100000101000111100020002561000001110314021072311967240004100121000040010120061120061120058120061120042
5002412005789910000100001010012002611949910952025600134001410002100003001010000100001079674573652461332161120033120060120057113178311368050010300201000010000600202000010000120057120057115002110910400101000010000010100013110002001641000011011314021072211967240004130121000040010120042120042120043120042120058

Test 3: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ldr s0, [x6, x7]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x7, x7, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire (01)cycle (02)03050708090a0b0e0f18191e1f22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
5020512005789910100110002010012004511952010947325601064010210002100003010010000100001079089573575061366231120036120041120060113155311363050100302001000010000602002000010000120060120057115020110099100401001000010000110010002111000300111000011110321011071111964640002131091000040100120058120042120061120061120061
5020412004190000000000001010012004011974010950125601034010010001100003010010000100001078862573638061344611120027120054120035113146311365850100302001000010000602002000010000120054120051115020110099100401001000010000010010003111000100171000011112321011071111966440002130121000040100120058120061120061120042120058
5020412005789910000000002000012004511951310946725601034010010001100003010010000100001079035573638061363171120036120060120060113137311363050100302001000010000602002000010000120060120107115020110099100401001000010000110010000011000000001000010100321011071111965840004131001000040100120058120058120061120042120061
5020412004189910110100001000112004511949310946725601004010210001100003010010000100001079035573638061363171120030120054120054113141311367150100302001000010000602002000010000120054120051115020110099100401001000010000010010000001000000001000000000321011011111966140000101001000040100120042120042120058120058120061
50204120060899100110000020000120045119513109467256010040102100011000030100100001000010790085735455613446111200301200351200511131493113677501003020010000100006020020000100001200571200571150201100991004010010000100000100100000110000000010000100003210210711119646400001313121000040100120061120061120061120042120061
502041200609001001010000200001200261195131094492560100401001000110000301001000010000107903557363806136317112001112005412003511314931136715010030200100001000060200200001000012005112003511502011009910040100100001000001001000001100010000100001000032101107111196614000201391000040100120042120061120061120042120061
50204120041899100000000010100120045119516109473256010640102100021000030100100001000010790895736668613662311200331200571200571131523113674501003020010000100006020020000100001200601200411150201100991004010010000100000100100012110003000110000011103210110711119654400040001000040100120058120061120042120042120061
502041200418991011000000200001200261195131094552560103401021000210000301001000010000107946757359426136623112003312006012005711313731136305010030200100001000060200200001000012006012005711502011009910040100100001000001001003211100341838933910034110103945130722121579402341013121000040100122638122581122502122452122518
50204120041899100001000030472112100123038121101110542713605434040810056100603453211266115261145447579982262087281122244122876123548114183311367750100302001005410000647422000011733123472122699331502011009910040100100001000011001005540100480001347711005011110321013001112177540337131001000040100120062120042120061120042122142
502041227449541022000026283701176000012004511951310947025601064010410002100003010010000100001079089573686061366231120017120041120060113155311363050100302001000010000602002000010000120041120041115020110099100401001000010000010010000011000001001000010100321011011111965840000131001000040100120055120036120055120055120055

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0047

retire (01)cycle (02)030408090a0e0f1e1f2223243f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5dcache load miss (bf)c2c5branch mispredict (cb)cdcfitlb miss (d4)d5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
500251200518990000001088100120322119536109464256001340010100011000030010100001000010795575736236613275701200271200511200511131690311369150010300201000010000600202000010000120051120051115002110910400101000010000010100001100002010000110200314005107541196664001110091000040010120070120149120052120052120052
5002412005189910101010100120036119510109513256002640012100011000030010100001005710795575735455613366201200271200511200511131530311369050010300201000010000600202000010000120035120035115002110910400101000010000110100001100000310000100000314115107551196664000010001000040010120073120057120036120052120052
50024120051899101000100001200361195091094642560013400101000110000300101000010000107955757362366133662012002712005112005111316903113690500103002010000100006002020000100001200511200511150021109104001010000100000101000011000000100001100003141141076511966640002101091000040010120056120052120036120052120039
500241200548991010001010012003611950910946425600104001210001100003001010000100001079557573623661336620120027120035120051113169031136905001030020100001000060020200001000012005112005111500211091040010100001000001010000110000001000011000031411510756119666400000091000040010120073120055120036120052120052
50024120051899101000001001200361195091094492560013400101000010000300101000010055107955757362366133662012002712005112005111317103113690500103002010000100006002020000100001200351200542150021109104001010000100000101000001000000100001000003141151075511966640002101091000040010120060120053120052120052120052
50024120035900101000100001200361195281094642560013400121000110000300101000010000107955757362366132757012002712005112005111316903113674500103002010000100006002020000100001200511200511150021109104001010000100000101000011000000100001100003141151075511966640002101001000040010120068120052120052120052120052
50024120051899101000001001200361195091094642560013400121000110000300101000010000107955757362366132757012001112003512005111316903113690500103002010000100006002020000100001200511200511150021109104001010000100000101000011000000100001100003140051075611966640002101001000040010120073120053120102120052120056
500241200518991010001010012003611950910946425600104001210001100003001010061100001079557573623661327570120030120051120051113153031136905001030020100001000060020200001000012005112005111500211091040010100001000001010000110000001000001010031400538551196664000210001000040010120067120053120052120052120036
50024120035899101000601001200201195091094642560013400101000010000300101000010000107955757362366132757012002712005112003511315303113690500103002010000100006002020000100001200511200521150021109104001010000100000101000011000000100000100003141151076511966640002101091000040010120088120060120052120052120052
500241200358991010000010012003711950910946446600134001210001100003001010000100001081542573801261342790120027120054120051113153031136745001030020100001000060020201081021812005112005111500211091040010100001000011010010110000015100061100003224161447511975240026101091000040010120052120053120052120058120053

Test 4: throughput

Count: 8

Code:

  ldr s0, [x6, x7]
  ldr s0, [x6, x7]
  ldr s0, [x6, x7]
  ldr s0, [x6, x7]
  ldr s0, [x6, x7]
  ldr s0, [x6, x7]
  ldr s0, [x6, x7]
  ldr s0, [x6, x7]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)030e18191e223a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5b6bbdcache load miss (bf)c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80205267332000000132672521812857180100100801301028001650011665961266822672226722166506166748011520080024200160048267222672211802011009910010080000800001100800000398003900358003961353911151180160026731600800001002672826728267282672826728
802042672220000041002669201212025801001008000010080014500116730312670226727267221663561667980115200800242001600482672926722118020110099100100800008000001008000003980000103980035603543111511801601267231002800001002670826728267282672326728
8020426722200000121226725212020258010010080000100800155001177116126702267282670716655616679801142008002420016004826727267221180201100991001008000080000010080000039800390045800396000111511811600267276104800001002673526873267392672826725
8020426727200111541026716318181825801001008000010080014500116659612668226727267271665461665980115200800242001600482672726722118020110099100100800008000001008000003980039003800000135392225129123112672410104800001002672326729267092671326723
802042670820000041022670720181625801001008000010080021500116671412668626727267081664810166688012120080030200160060267272672711802011009910010080000800000100800000398003900398003900354322251281231126724862800001002670926723267092672326728
8020426708199000571226717218121225801001008000010080200500116610812670226727267221664591666680119200800302001600602672726722118020110099100100800008000001008000003980000003580000613943222512912311267190104800001002672826724267232672326723
80204267272000004500267122121216258010010080000100800195001166108126683267282672716625101666780120200800302001600602670826708118020110099100100800008000001008000000800390008003961043222512912312267591002800001002670926723267282670926709
8020426727200000010267122120122580100100800001008002150011594791267022672726708166409166678011920080030200160060267272672811802011009910010080000800000100800000398003910080035613539222512912311268040100800001002672826709267092672326729
802042672820000041022671120016258010010080000100800215001166108126702267282672716640101666680120200800302001600602672826722118020110099100100800008000001008000003980000103980000613639222512812311268371004800001002672826709267282670926709
802042670820000045112670721212162580100100800001008002250011673981267022672726722166459166678012120080030200160060267272672211802011009910010080000800000100800000398003900388003901350222512922312268191064800001002672826709267282672826728

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire (01)cycle (02)030508090b0e0f1e223a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)6067696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2c5cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80025267332001111016512267683181816258001010800001080000501165822002670826732267321667731671280010208000020160000267322673211800211091010800008000011080019204280057111598003860584219015020261625252671200280000102673426734267342673426716
80024267322001001006502267170002258001010800001080000501167660002670726733267321667731671280010208000020160000267152671411800211091010800008000001080020194280058000598003860574219105020261625242673009280000102673326733267332673326734
80024267322001011006503267232181815258001010800001080000501167298002670726732267321667731671280010208000020160000267322673211800211091010800008000001080019204280019000598000061574219105020151625202672990280000102673426734267342673426733
8002426732200111000651326718301815258001010800001080000501167660002670726732267321667731671280010208000020160000267322673211800211091010800008000001080020194280057100598000061194219005020261617242673099280000102673326733267332673426733
8002426732201111100651326718018016258001010800001080000501167660002670726733267321667731671280010208000020160000267322671511800211091010800008000001080020204280057201598003801574219105020151625152673099080000102673326733267332673326733
800242673220011110065122671921801625800101080000108000050116582200267082673226732166773167128001020800002016000026732267321180021109101080000800000108002019428001910121800000057019205020261619252672990280000102673326716267342673326733
8002426732200101000210326717218181625800101080000108000050116729800267072673226714166773167128001020800002016000026732267321180021109101080000800001108002020428005710121800386157019105020221625252672999280000102673326715267152673326733
80024267332001111002103268012181816258001010800001080000501170204002670826732267331667731671280010208000020160000267322673311800211091010800008000001080020204280057101598003800574219105020251625252672999280000102673326733267332671526733
8002426732200111100641326717018181625800101080000108000050117020400267072673226733166773167128001020800002016000026732267331180021109101080000800000108001921080057113598003861574219105020251621242672999080000102671626733267342673326733
8002426732200100100651326701218180258001010800001080000501170204002668926714267151667731669580010208000020160000267152673211800211091010800008000001080020214280057000598003861574219005020221625252671299280000102673326733267332673326733