Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDR (register, sxtw, D)

Test 1: uops

Code:

  ldr d0, [x6, w7, sxtw]
  mov x7, #4
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030508090b0e0f1e22243a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)92inst fp/simd load (98)inst ldst (9b)a0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
100539430010004410138321119251000100010001552603784034032253260100010002000403402111001010001000102020431059000381038613944007311611395141071000399399399399399
10044033000011010137921119251000100010001498903733983942213256100010002000398394111001010001000100004310380006110416158431927311611400131351000404404404403404
10043943000000450013832111925100010001000150370373398398221325610001000200039439411100101000100010000431038000391038613844007311611391141441000395399399396399
10043983000000450013832121192510001000100015555037840340322632601000100020004034021110010100010001019194310581016010406159431907311611400131351000404403404404404
100439830000004410138321119251000100010001555503784034032253261100010002000403402111001110001000101919431060100381038613844007311611395101071000399399399399399
100439830000005300138321119251000100010001527403733983982213256100010002000398394111001010001000100004310380006110396158431807311611400131351000404404404404404
10043983000000440013832111925100010001000152080373402398221325610001000200039839411100101000100010000431038000381038613844007311611391141071000395395399399399
10043943000000440013832111925100010001000152080373398398221325410001000200039839411100101000100010000431039000381038613843007311611391141441000395399399395399
10043982000000440013832111925100010001000152670373398398221325610001000200039839411100101000100010000441038000381038613844007311611395141471000399399399399399
10043983000000440013792111925100010001000152740373398398221325610001000200039839411100101000100010000441038000381038613844007311611395141471000395399399399399

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr d0, [x6, w7, sxtw]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0050

retire (01)cycle (02)03070a0e0f1e1f22233f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8a9acafb5dcache load miss (bf)c2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
50205120047899000110001200351195291094612560103401021000110000301001000010000107899957361886133818012002301200501200471131453113658501003020010000100006020020000100001200501200471150201100991004010010000100000100100001100000001000011003210210722119657400020601000040100120102120074120057120051120036
50204120050899000000101200351197791094852560103401021000010000301001000010000107886257361886133818012001101200471200351131413113636501003020010000100006020020000100001200351200471150201100991004010010000100000100100001100000001000011003210213522119660400029051000040100120104120078120051120036120051
50204120035899000000001200351195111094492560103401001000110000301001000010000107899957361886136166012002601200351200501131433113658501003020010000100006020020000100001200471200351150201100991004010010000100000100100001100000001000001003210213522119661400009081000040100120110120091120052120036120036
502041200358990000210101200201195721094632560103401021000110000301001000010000107886257361886133818012001101200501200501131453113667501003020010000100006020020000100001200351200471150201100991004010010000100000100100001100000001000011003210210132119657400029001000040100120048120092120061120052120051
50204120050899000010101200351195221094612560103401001000110000301001000010000107920057361886136166012001101200501200471131413113658501003020010000100006020020000100001200511200471150201100991004010010000100000100100000100001001000011003210213522119646400020651000040100120107120087120054120051120048
50204120047899000010001200321195641094612560103401001000010000301001000010000107920057361886133818012002301200351200361131483113667501003020010000100006020020000100001202341201431150201100991004010010000100000100100031100050001000011003210210122119658400009901000040100120048120105120071120055120048
50204120050899000010101200201195571094612560103401021000010000301001000010000107886257360446134461012002901200471204171131413113669501003020010000100006020020000100001200751200471150201100991004010010000100000100100000100000001000011003210213522119657400000051000040100120048120152120052120048120036
50204120050899000010001200351195091094632560103401001000010000301001000010000107886257361886133818012002601200501200351131413113658501003020010000100656020020000100001200351200471150201100991004010010000100000100100001100000001000001003210210122119658400029001000040100120095120066120051120051120036
50204120050900000010101200201195731094612560100401021000110000301001000010000107886257361886133818012002601200471200471131453113658501003020010000100006020020000100001200501200471150201100991004010010000100000100100001100000001000011003210210723119662400026681000040100120036120099120058120050120051
502041200508990000001012002011951110944953601034010210001100003010010052100001079044573545561338180120187012004812005011314521113658501003020010000100006020020000100001200501200561150201100991004010010000100001100100000100000001000011003210210122119660400096651000040100120048120105120065120052120051

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire (01)cycle (02)03090e0f1e22243f4d4f5051inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8acafb5dcache load miss (bf)c5cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
500251200518991111001200201194921094700256001340040100021000030010100001000010795175735455613366201200930120035120035113177311369050010300201000010000600202000010000120054120035115002110910400101000010000110100000110000001000011003140171076311966640002131091000040010120036120055120055120055120055
5002412003590000000012002011951210946402560010400121000110000300101000010000107951757362366133662112003031200511200511131722311375850010300201000010000600202010610000120035120051115002110910400101000010000010100000110000401000011003140510733119669400021313121000040010120036120036120036120055120110
5002412005189900000012003911951210944902560013400101000110000300101000010000107951757354556132757012001101200351200541131723113674500103002010000100006002020000100001200541200351150021109104001010000100000101000000100000010000100131403107651196664000001091000040010120052120052120036120052120052
5002412003589900010012003611949210946402560013400121000110000300101000010000107955757362366133662012001101200931200351131723113674500103002010000100006002020000100001200351200511150021109104001010000100000101000001100000010000110031404107441196664000000121000040010120055120052120036120036120055
5002412005489900060012003611950910946402560013400101000410000300101000010000107958457362366138103012002701200511200511131693113690500103002010000100006002020000100001200511200351150021109104001010000100000101000000100000010000110031405107541196694000201001000040010120036120052120052120055120052
50024120051899000010120039119492109464013560013400121000110000300101000010000107951757362366133662112001101200511200511131533113690500103002010000100006002020000100001200511200511150021109104001010000100000101000000100000010000010031405107551196664000001091000040010120036120052120052120102120055
5002412005489901000012002011949210946402560013400101000110000300101000010000107958457363806133662112002701200351200511131693113690500103002010000100006002020000100001200511200361150021109104001010000100000101000000100000010000010031405107641196694000001391000040010120055120036120036120036120052
50024120035900000100120036119509109464025600134001010001100003001010000100001079517573623661346450120011012003512005111316931136745001030020100001000060020200001000012005112005111500211091040010100001000001010000011000000100001000314031074411966640002101091000040010120055120055120055120055120055
500241200518990001001200361195121095270256001040010100011000030160100001000010795575736236613366201200110120054120051113172311369050010300201000010000600202000010000120035120051115002110910400101000010000010100000110000001000011003140710744119650400020091000040010120036120052120036120052120052
5002412005489900010012002011951210944904160013400121000110000300101000010000107951757370906136144012001101200571200541131723113693500103002010000100006002020000100001200351200511150021109104001010000100000101000001100000010000000031406107741196664000013001000040010120036120055120055120036120036

Test 3: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ldr d0, [x6, w7, sxtw]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x7, x7, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire (01)cycle (02)0305080b0e0f18191e1f2223243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696b6d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
5020512005389911100002010011200381195121094552560106401041000210000301001000010000107904457357506136419120031012005312005311313703113670501003020010000100006020020000100001200531200531150201100991004010010000100001100100012110002101911000011111003211210733119660400046651000040100120100120080120057120054120054
50204120053900111000010100112003811951210946625601064010410001100003010010000100001079026573633261364191200170120053120053113148031136705010030200100001000060200200001000012005312005311502011009910040100100001000001001000120100010011000001111003211213532119660400046651000040100120054120054120042120054120054
50204120053899111000020000012003811951210946625601064010410001100003010010000100001079026573633261364191200290120053120053113148031136705010030200100001000060200200001000012005312005311502011009910040100100001000011001000320100010111000011110003211213523119660400040651000040100120054120054120054120054120054
50204120053899111000020000112003811951210946625601064010410001100003010010000100001079026573633261334581200170120053120053113137031136305010030200100001000060200200001000012005312005411502011009910040100100001000001001000121100010011000011111003211310734119660400046651000040100120054120054120054120054120058
50204120054900111000020100112003811951210945525601064010410002100003010010000100001079026573633261334581200290120053120053113148031136705010030200100001000060200200001000012005312005311502011009910040100100001000011001000321100020171000011110003211310733119660400046651000040100120054120054120054120054120054
50204120053899111000020000112003811951210945525601064010410001100003010010000100001079026573575061364191200290120053120053113148031136705010030200100001000060200200001000012005312004111502011009910040100100001000001001000240100020111000001111003211310723119660400026651000040100120054120042120054120054120054
50204120053899111000010000012002611951310946625601034010410002100003010010000100001079026573633261364191200290120053120041113148031136705010030200100001000060200200001000012005312005311502011009910040100100001000011001000211100020111000011110013211413533119660400046651000040100120054120042120054120054120042
502041200419001100000317020241001122357120632110253684603794032510054100543395411372102991133951579420061937891219430122382122413113935015311464956154341811133511415682902279011354122455120055615020110099100401001000010000110010016111003800957811003411113004119437054121676402246651000040100122471122932123236123743120054
50204120053940122002318409427281002123107121747111014126960629405041008410088360851236511822116366258174826211466122976012300212347111322603601146145693834912115531182567940233161172912358612337732150201100991004010010000100001100100032110002021100001010000321131073311965840000101001000040100120052120052120052120036120052
5020412005189900000001000101200201194931094642560103401021000110000301001000010000107900857362366136317120011012005112003511314603113658501003020010000100006020020000100001200511200511150201100991004010010000100001100100000010000000100000010000321131072311964640002101091000040100120052120052120052120052120036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0050

retire (01)cycle (02)0305080b0e0f1e22243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
50025120056899000001100120035119510109461256001340017100011000030010100001000010796385737783613376411200171200531200891131710311369250010300201000010000600202000010000120053120053115002110910400101000010000010100013110002011100001111003140710711119656400026681000040010120054120054120073120073120042
50024120041899100002001120041119543109469256001640012100021000230010100001000010796025736332613376401200321200561200411131590311369550010300201000010000600202000010000120056120053115002110910400101000010000010100012110002011100001111003140110711119671400049001000040010120054120088120096120061120057
50024120056899111007000120041119691109467406001640014100021000030576100001000010796025735750613376411200321200631200561131740311369250010300201000010000600202000010000120061120041115002110910400101000010000010100012010002001100001101103140110711119671400046681000040010120054120122120061120057120042
50024120056899101001000120041119537109455256001640014100011000030010100001000010796025736476613321611200171200411200571131740311368050010300201000010000600202000010000120056120053115002110910400101000010000010100012110002001100001101003140110711119671400040681000040010120042120095120079120058120054
50024120041899100001100120041119511109515256001340014100011000030010100001000010796025736476613321611200321200411200561131740311368050010300201000010000600202000010000120041120041115002110910400101000010000010100014110003011100001101003140110711119656400026051000040010120054120116120058120054120042
50024120056899101002000120026119526109466256001640012100021000030010100621000010794635736332613321611200291200411200411131740311369650010300201006610000600202000010000120063120053115002110910400101000010000010100013110001001100001111003140110721119656400049981000040010120057120120120060120054120057
50024120056899100002000120086119502109455256001640014100021000030010100001000010794635736476613391711200171200561200531131740311369550010300201000010000600202000010000120053120053115002110910400101000010000010100013110002014100000111103140110711119668400040981000040010120105120079120058120057120088
50024120056899110000000120020119510109449256004840018100001000030010100001000010795485735455613275711201071200351200471131530311368650010300201000010000600202000010000120035120047115002110910400101000010000010100000110000000100000010003140110711119665400020681000040010120051120101120055120051120051
50024120035899000000000120037119509109463256001040010100011000030010100001000010795175735455613275711200231200501200351131680311367450010300201000010000600202000010000120050120047115002110910400101000010000010100000110000000100001000003140110711119650400029981000040010120048120107120049120048120051
50024120050899000001000120035119492109463256001040012100011000030010100001000010795485735455613275711200261200351200501131680311368950010300201000010000600202000010000120047120047115002110910400101000010000110100000110000100100001000003140110711119650400000651000040010120048120117120036120048120051

Test 4: throughput

Count: 8

Code:

  ldr d0, [x6, w7, sxtw]
  ldr d0, [x6, w7, sxtw]
  ldr d0, [x6, w7, sxtw]
  ldr d0, [x6, w7, sxtw]
  ldr d0, [x6, w7, sxtw]
  ldr d0, [x6, w7, sxtw]
  ldr d0, [x6, w7, sxtw]
  ldr d0, [x6, w7, sxtw]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3340

retire (01)cycle (02)03090b0e0f1e22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a8a9acafb5b6bbdcache load miss (bf)c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80205267232001000450012671001212152580100100800001008001450011683321266970267072672716635616679801162008002420016004826722267071180201100991001008000080000110080000000800390035800356103911151180160026724002800001002672726713267232672826728
802042672520000004110226850212121225801001008000010080016500116743112670402670726722166506166598011420080024200160048267222672211802011009910010080000800000100800000390800000038800396100111511801600267041004800001002672326708267282672826708
8020426727200001000022669720124258010010080000100800145651173997126697026722267071665061666280115200800242001600482670726722118020110099100100800008000001008000003908003900398003901353911151180160026727062800001002673026728267282671226727
80204267272000000451002671621201625801001008000010080014500116595312668202672726727166356166598011520080024200160048267072672211802011009910010080000800000100800000390800390008000061039111511801600267241062800001002672826728267282670826728
8020426722200000001012683720012498010010080000100800145001177260126682026727267071665561667980114200800242001600482670826789118020110099100100800008000001008000003908003900080039003543111511801600267041060800001002672826708267082672826723
80204267272000000450022672521200258010010080000100800155001166596126682026727267271665561667980115200800242001600482672726722118020110099100100800008000001008000004308003900080039603543111511801600267241064800001002672826728267282672826708
8020426722200000045101267242180162580100100800001008001550011678751267020267272672716635616679801152008002420016004826727267221180201100991001008000080000010080000039080039003580035013543111511801600267246104800001002672326708267082670826708
80204267472000000451022672021218025801001008000010080014500117711612670202678826783166556166598011520080024200160048267632672211802011009910010080000800000100800000008003900398000061350111511801600267041060800001002672826728267232672326723
80204267072000000450012669721812162580100100800001008001550011771161267020267272672216655616674801142008002420016004826722267221180201100991001008000080000010080000039080039003580039613939111511801600267241064800001002670826728267282672826708
80204267072000000450002671301201225801001008000010080015500116787512668202672726727166356166598011420080024200160048267072672211802011009910010080000800000100800000008003900358003960043111511801600267061060800001002672326708267282677126727

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)03090e0f1e22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8acafb5b6bbdcache load miss (bf)cdcfd0d2d5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8002526727200001451012670021201625800101080000108000050116675000266832672726727166723167078001020800002016000026728267081180021109101080000800001108000000800390080039003943050200041643267241010480000102672826729267292672926728
800242672720000045001267152000258001010800001080000501166896012670226731267081667231670880010208000020160000267082672711800211091010800008000001080000043800000398003961043050200041643267241010080000102672926729267282670926728
800242670820000045101267120121162580010108000010800005011668860026702267282673116672316708800102080000201600002671226727118002110910108000080000010800000438003903980000610430502000416432672500480000102672826728267282670926729
8002426708200000451002671021212025800101080000108000050116675000267032670826727166523166888001020800002016000026727267271180021109101080000800000108000000800390398003961394305020004164426724100480000102672826709267292672926728
80024267272000000101267202121219258001010800001080000501165883012668326708267281665231670780010208000020160000267082671111800211091010800008000001080000043800390946801690104305020004164426724010480000102670926728267092672826729
800242670820100001002669301200258001010800001080000501166886012670326708267271667231668880010208000020160000267282670811800211091010800008000001080000043800390398000000394305020003164326705010480000102672926729267282672826729
8002426728200000451012671220120258001010800001080000501166750002668326728267081667631670780010208000020160000267282672711800211091010800008000001080000008003900800390004305020004164426728100480000102672826728267292670926709
80024267272001000000267130121202580010108000010800005011668960126703267272672716672316707800102080000201600002672726708118002110910108000080000010800000438003903980000613943050200041644267251010480000102672826728267282672826729
80024267312000000100267120000258001010800001080000501168843002670326727267271667231670780010208000020160000267082672711800211091010800008000001080000043800390080039010005020004164326705100480000102670926709267292672826728
8002426727200011451002671801212162580010108000010800005011688430126683267282672816672316711800102080000201600002672826708118002110910108000080000110800000438003903980039603943050200041643267251010480000102672826729267092672926728