Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDR (register, sxtw, S)

Test 1: uops

Code:

  ldr s0, [x6, w7, sxtw]
  mov x7, #4
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)0305080b0e0f1e223a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)92inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
100540331000067123883772025100010001000154800378403403225326010001000200040340311100101000100011020194310590026410406159431907321622402131351000404406403403404
100440331110067023882771925100010001000155551378403402225326110001000200040240311100101000100001019204310591026310396159431917321622403131351000404404406403404
100440331110066033883971925100010001000155471377403403226326010001000200040340311100101000100001021224310590016110406159431927321622400131351000404403404404404
100440331010066023883771925100010001000155550377403403226326010001000200040340311100111000100001019204310601016310406158431917321622400131351000405404403404404
100440331000067023883772025100010001000154800378403403226326010001000200040340311100101000100001020194310590016010406160431907321622404131351000403404404404404
100440331010066033883771925100010001000155260378402403225326110001000200040340311100101000100001020204310581016110406158431927321622399131351000403405404404404
100440231000066023873772025100010001000155670379403402225326110001000200040240311100111000100001020204310581006110406159431907321622399131351000403404404404404
100440331110066033883771925100010001000155550377403403226326010001000200040340311100111000100001020204310590006110396159431917321622400131351000404403403404404
100440321010066133883772025100010001000155550378402403225326110001000200040340311100101000100011019204310580006110406158431917321622400131351000403404404404404
100440331000066023883772025100010001000155080378403403225326110001000200040340311100101000100001019204310590016010396159431907321622411131351000403404404404404

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr s0, [x6, w7, sxtw]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire (01)cycle (02)03080b0e0f191e223f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8acafb5dcache load miss (bf)c2c5cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
502051200518990010010120039119513109467256010340100100011000030100100001000010790355736380613631711200590120054120054113141311367150100302001000010000602002012810000120056120052115020110099100401001000010000110010000011000000100001100032103107111196584000213091000040100120052120052120036120052120052
502041200358990000010120036119493109467256010340102100011000030100100001000010788625736380613631711200300120051120051113149311365850100302001000010000602002000010000120035120051115020110099100401001000010000010010000011000012410000100003210110111119661400021310121000040100120052120036120036120036120055
502041200358990000010012002011951310946725601034010210001100003010010000100001079035573545561366571120011312005412005411314131136585010030200100001000060200200001000012005112003511502011009910040100100001000011001000001100000010000000003210110712119661400021313121000040100120036120055120055120052120052
5020412005189900000111200391194931094642560100401021000110000301001000010000107900857362366136368112003001200351200541131493113671501003020010000100006020020000100001200541200511150201100991004010010000100000100100000110000001000010000321011071111964740000013121000040100120102120093120055120052120055
5020412005489900000111200361195131094672560103401021000110000301001000010000107900857362366136317112002701200541200541131493113671501003020010000100006020020000100001200541200541150201100991004010010000100000100100000110000001000001000321011071111965840002131391000040100120055120052120052120052120052
5020412005189900000111200361195061094642560103401001000110000301001000010000107886257363806136317112002701200541200511131413113671501003020010000100006020020000100001200541200511150201100991004010010000100000100100000110000001000001000321011071111965840002131301000040100120036120055120052120055120052
5020412003590000110011200391195101094642560103401021000010000301001000010000107900857362366136317112001101200351200511131463113668501003020010000100006020020000100001200511200511150201100991004010010000100000100100000110000001000010000321011071111966140002130121000040100120036120052120055120055120055
5020412005489900000611200361195131094672560103401021000110000301001000010000107903557363806134461112003001200511200511131493113671501003020010000100006020020000100001200511200511150201100991004010010000100000100100000110000101000011000321011011111966140002131091000040100120057120131120052120132120148
502041201398991100011120039119510109464256010040102100011000030100100001000010790355736380613446111200140120035120051113149311365850100302001000010000602002000010000120035120095115020110099100401001000010000010010000011000006100001100032372126111198324000010091000040100120052120037120055120036120036
5020412005189900000120120039119510109467256010340100100011000030100100001000010790355736524613647011200270120054120056113146311366050100302001000010000605922000010000120035120035115020110099100401001000010000010010000011000000100010000032101107111196584000201301000040100120055120055120052120036120055

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0057

retire (01)cycle (02)030508090b0e0f181e22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2c5cfd5d6d9ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
50025120057899110001010001200451194991094732560013400121000210000300101000010000107946357365246133968012003301200411200571131783113703500103002010000100006002020000100001200601200411150021109104001010000100000101000121100030111000011111031403107042119672400041310121000040010120058120061120058120058120058
50024120057899100000021001200451195181094702560016400141000110000300101000010000107963857366686133216012003601200571200601131783113696500103002010000100006002020000100001200601200571150021109104001010000100001101000220100010111000111111031402107042119675400021313121000040010120061120042120042120061120042
50024120060899111000021001200361195181094552560016400141000110000300101000010000107963857366686133968012001701200601200601131593113699500103002010000100006002020000100001200581200571150021109104001010000100000101000221100010111000011112031402107042119675400041310121000040010120061120061120058120061120045
5002412006089911000012001120045119518109774256001640029100021000030010100001000010796385736668613396801200360120060120060113178311369950010300201000010000600202000010000120041120041115002110910400101000010000010100031110002031100001111113140210704311967240004100121000040010120042120061120042120061120061
5002412004189910100002001120042119518109455256001340012100021000030010100001000010794635736524613396801200360120060120060113178311369950010300201000010000600202000010000120041120057115002110910400101000010000010100012010002001100001101103140210706611967240004131391000040010120058120061120042120058120061
500241200578991001000200012004511951510947025600164001410002100003001010000100001079611573692361377571120017012006012006011315931136995001030020100001000060020200001000012005912008411500211091040010100001000001010003301000210110000111101314021071221196754000401301000040010120042120042120061120042120042
50024120057900101000070001200261195181094732560016400121000110000300101000010000107963857357506133216012003301200601200571131593113699500103002010000100006002020000100001200571200571150021109104001010000100000101000320100020111000011110031402107022119675400061310121000040010120061120061120061120042120042
500241201139001001000110001200451195181094732560013400121000210000300101000010000107945357366636134858012001701200601200411131783113699500123002010000100006002020000100001200601200571150021109104001010000100000101000221100020011000001111031402107022119675400041313121000040010120061120042120042120061120042
5002412006089911110002000120045119518109473256001640014100011000030012100001000010796305736663613397901200360120060120041113159311369950010300201000010000600202000010000120041120041115002110910400101000010000010100011110002001100001111003140310702211965640004013121000040010120061120061120061120061120061
500241200608991111000110012002611951810947025600164001410001100003001010000100001079611573652461332160120017012006012005711317831136965001230020100001000060020200001000012006012005711500211091040010100001000001010002111000300110000111120314061070221196724000213091000040010120058120061120061120061120058

Test 3: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ldr s0, [x6, w7, sxtw]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x7, x7, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0050

retire (01)cycle (02)03080b0e0f18191e1f22243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5dcache load miss (bf)c2c5branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
50205120047899001000433010012003211951910944925601034010010001100003010010000100001078999573618861338181120011120050120047113145311366750100302001000010000602002000010000120050120047115020110099100401001000010000110010000011000000010000110003210110111119657400029051000040100120051120051120051120054120036
502041200508990000001200000120035119509109463256010040102100011000030100100001000010789995735455613381801200261200501200501131451611363650100302001000010000602002000010000120050120035115020110099100401001000010000010010000011000000010000110003210110111119657400026681000040100120051120051120051120051120036
50204120050899001100292000012002011950910946325601034010210000100003010010000100001078862573618861344610120023120148120042113146311365850100302001000010000602002000010000120050120047115020110099100401001000010000010010000011000010010000110003210110111119657400026601000040100120036120036120048120048120048
5020412004789900000085010012002011949310944925601004010210000100003010010000100001078999573618861344610120023120050120050113146311365850100302001000010000602002000010000120035120047115020110099100401001000010000010010000001000000010000110003210113511119657400000051000040100120048120048120051120075120051
5020412004889900010094010012003211951310946125601034010010000100003010010000100001079026573545561338180120026120050120050113145311366850100302001000010000602002000010000120035120047115020110099100401001000010000010010000011000010010000010003210110711119657400026681000040100120051120051120051120048120048
5020412003589900000085000012002011949310944925601034010210001100003010010000100001078999573545561361660120031120035120050113145311363650100302001000010000602002000010000120035120035115020110099100401001000010000010010000011000000010000010003210110111119646400029081000040100120051120051120051120036120036
502041200358990000003390000120020119509109461256010340102100011000030100100641000010789995736188613644901200141200501200351131451511363750100302001000010000602002000010000120035120047115020110099100401001000010000010010000001000010010000110003210113511119657400009901000040100120051120051120051120051120054
50204120050899000000280100120082119493109449256010340102100001000030100100001000010789995736188613446101200261200501200351131453113667501003020010000100006020020000100001200351200471150201100991004010010000100000100100342010026128537110034112003897231811120956402279951000040100120048120048120131120036120051
5020412005089911002125359519361011231531210091103448776043340387100631004834521115741124211452255809843619210101200261200351200501133473871153335457633441100001000060200212961016312005012060717150201100991004010010000100000100100460110023107529010010110003210231034122932402790981000040100124275123874122760123460123376
502041231779591011031010012003511950910944925601034010210000100003010010000100001078999573618861338180120026120050120052113141311366750100302001000010000602002000010000120050120047115020110099100401001000010000010010000001000090010000000003210113511119660400020901000040100120036120048120051120051120051

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0053

retire (01)cycle (02)030508090b0e0f18191e1f22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2c5cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
500251200539431101018161114081011207731201231094664646010040057100341002630010108451079210795755773634615959001207491215541201861136383114511500103242510854101436290020000100001204361200412150021109104001010000100001101000220100030141000011110003140510732119668400026601000040010120042120044120043120145120057
5002412005389911100000200011232951211471106581007604244028710074100723509111885117731160917582198262277710122647123425123428114207457115589585033584711974119926389820106100001200821200571150021109104001010000100001101000331100020211000011010013140510711119671400046601000040010120054120057120054120057120054
500241200538991111000010100120038119511109466256001640012100021000030010100001000010795755736476613376401200291200531200411131713113692500103002010000100006002020000100001200591200531150021109104001010000100000101000331100010111000011110003140610711119668400040051000040010120042120054120054120054120054
50024120053900120100001430001120026119511109455256001640014100011000030010100001004910795755735750613376401200291200561200531131713113692500103018010000100006002020000100001201091200551150021109104001010000100000101000221100020111000011011003140310711119671400106651000040010120057120057120142120240120054
500241200538991001000020000120038119511109752256001340014100021000030010100001000010796025736572613376401200321200561200531131593113692500103002010000100006002020000100001200531200531150021109104001010000100000101000111100011041000011110003140210711119668400026951000040010120057120057120042120054120054
5002412005389910110000100011200381195141094662560016400141000210000300101000010000107957557363326133764012002912004112005311317416113695500103002010000100006002020106100001204461201541150021109104001010000100000101002311100010111000011111003140410722119668400040651000040010120057120057120060120061120057
500241200418991000000020000120026119499109469256001640014100021000030010100001000010796025736476613321601200291200531200411131713113692500103002010000100006002020000100001200761200531150021109104001010000100000101000111100021011000011111003140210711119674400040651000040010120238120054120057120054120042
5002412004189911110000289264101120038119511109466256001640012100021000030010100001000010796025736332613376401200291200531200531131743113695500103002010000100006002020000100001201021200541150021109104001010000100000101000211100010011000011110003140310711119668400046651000040010120057120057120057120054120140
500241200538991011000120100120038119514109469256001640030100021000030010100001000010795755736332613376401200291200531200531131593113692500103002010000100006002020000100001200881200531150021109104001010000100000101000221100010011000011011003140410721119671400049601000040010120054120054120054120054120054
500241200568991010010010101120041119514109466256001640014100021000030010100001000010796025735750613391701200321200531200531131713113695500103002010000100006002020000100001200741200531150021109104001010000100000101000321100020011000001112003140410722119668400046651000040010120054120042120054120054120057

Test 4: throughput

Count: 8

Code:

  ldr s0, [x6, w7, sxtw]
  ldr s0, [x6, w7, sxtw]
  ldr s0, [x6, w7, sxtw]
  ldr s0, [x6, w7, sxtw]
  ldr s0, [x6, w7, sxtw]
  ldr s0, [x6, w7, sxtw]
  ldr s0, [x6, w7, sxtw]
  ldr s0, [x6, w7, sxtw]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3340

retire (01)cycle (02)03090e0f1e22243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5b6bbdcache load miss (bf)c2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8020526723200001631012671221800258010010080000100800155001177116126682268282671216756616679801142008002420016004826722267221180201100991001008000080000010080000398000003580035613543011151180160026724604800001002672826728267282672826728
8020426707200000600012671200121625801001008000010080015500115981412668226859267291666261667980115200800242001600482670726722118020110099100100800008000001008000008000003980000603543011151180160026724002800001002687726726267282672326728
802042672720000093001266922121216258010010080000100800155001177116126702268122672916662616681801162008002420016004826722267221180201100991001008000080000010080000398003900800356035430111511801600267191062800001002673326733267232672326723
802042671020000060100266922181216258010010080000100800145001167875126682267222670716650616678801152008002420016004826727267221180201100991001008000080000110080000080039008000061350011151180160026704664800001002672326723267282670826728
80204267272000000102266922121216258010010080000100800155001165856126702267442683016657616659801142008002420016004826727267071180201100991001008000080000010080000398000003980035610430111511801600267041004800001002672826708267232672326728
802042670720000051002266922120025801001008000010080015500116730312668226838267121666161665980115200800242001600482670726722118020110099100100800008000001008000039800390398000061393901115118016002672410104800001002674126734267232671926728
802042672220000010100226692012121625801001008000010080200500116585612670226799267331664161667980115200800242001600482672226707118020110099100100800008000001008000039800000398003900394301115118016002671910102800001002670826708267232672326728
80204267272000009610026692218181225801001008000010080015500116787512670226801267131664161665980114200800242001600482672726707118020110099100100800008000001008000039800390428003900043011151180160026724662800001002672826728267082672826708
802042672720000063102266922120162580100100800001008001550011673031266972684726712166586166798011420080024200160048267272670711802011009910010080000800000100800003980039039800406104301115118016002670410104800001002683226708267292672726735
8020426727200001540022671201212162580100100800001008001550011658561266822680526813166616166828011420080024200160048267222672211802011009910010080000800001100800003980039039800396135430111511811600267191004800001002672826728267232670826728

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire (01)cycle (02)030508090b0e0f1e22243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cdcfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
800252673720011000023800326722200192580010108000010800005011739752671226736267191668931671680010208000020160000267362673711800211091101080000800001108002120438001900160800406119431910502021633267341313080000102673726737267152673826716
80024267152001111008100126721307025800101080000108000050116721926711268362684616688316695800102080000201600002673626736118002110901010800008000001080020200800190006180040011943192050203162226712013580000102671626737267372673826737
80024267142001001006610326721307202580010108000010800005011653042668926827267441698431681680010208000020160000267372673611800211090101080000800000108001920080058000218004061580191050202162226733130580000102673726716267372673726738
800242673720010110067102266990002125800101080000108000050116696026711268432672316690316694800102080000201600002673626737118002110901010800008000001080019204480019001608004161590191050202162226712130580000102673826737267372673726738
800242673620011010067103266993771258001010800001080000501173975267122685226723166953167238001020800002016000026737267361180021109010108000080000010800201943800591016180000615943191050203162226711130080000102673726737267152673726738
8002426736200111000671002670000002580010108000010800005011739752671226853267201668831669580010208000020160000267362673611800211090101080000800000108001919438005910021800396159431910502021622267111313580000102671526737267402673726738
800252671520011110021102267222770258001010800001080000501167219266922673426740166813167188001020800002016000026736267151180021109010108000080000010800191943800590016180040005943191050203163326734130080000102673826738267382671626737
80024267362001010002100126721370202580010108000010800005011685832671126846267231708131672980010208000020160000267142671511800211090101080000800000108002020080019000618003960590193050202163226734013580000102673826716267372671526737
80024267152001000002110126721307025800101080000108000050117397526689268762674816690316716800102080000201600002673626736118002110901010800008000001080019214380059101618003961190180050202162226712013080000102673826716267382671626737
800242673620010100066002267213770258001010800001080000501173975267112674426863167073167178001020800002016000026714267361180021109010108000080000110800201943800590026180000601943190050203162226712013080000102671626738267382673726737