Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDR (register, lsl, D)

Test 1: uops

Code:

  ldr d0, [x6, x7, lsl #3]
  mov x7, #4
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030508090b0e0f1e22233a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)92inst fp/simd load (98)inst ldst (9b)9da0a3a6a8acafb5b6bbdcache load miss (bf)dtlb miss (c1)cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
100540430000116710238937102125100010001000155721379404404226326210001000200038240411100101000100001000431042042100030614519731161140301461000404382382405382
1004403211110167001380000202510001000100015315137039937421832571000100020003993951110010100010000100043104104110003142440731161140314061000405383405405405
10044043101000670013590110251000100010001406013493993992183257100010002000399374111001010001000010004310410010413042440731161140101461000405405382405382
10044033100100670003840101725100010001000153251349374399222323210001000200039937411100101000100001000441042041104100424407311611401141461000406382405405382
10044033111000661013842112202510001000100015322137039939919732571000100020003743951110010100010000100001041041104101044073116113710071000375400400400400
10043743000000440013592012025100010001000153151370399374218325710001000200037439911100101000100001000431042141104131044073116114010061000382405383382405
1004404210110021001384200172510001000100015037137439940222232571000100020003743991110010100010000100044104104410000142440731161140114061000382404405387405
100438131101102100038421120251000100010001532213743993991973232100010002000399374111001010001000010004310000010000041007311611401141461000405405405382405
100440531111002100138907720251000100010001557213793813812043262100010002000381403111001010001000010000104104110410004307311611379141461000383405405405405
100440431111006601138501212202510001000100015037134939939919732571000100020003993951110010100010000100043104104110413142007311611401141461000405404383426405

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr d0, [x6, x7, lsl #3]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0055

retire (01)cycle (02)0308090e0f181e1f22233f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a7a8acafb5dcache load miss (bf)c2branch mispredict (cb)cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
5020512005289901100601012004011949310946525601034010210000100003010010000100001078862573642861344611120011012005512005511315031136725010030200100001000060200200001000012003512005211502011009910040100100001000001001000011000000010000110032100110711119662400021411131000040100120056120056120056120053120056
502041200558990000000101200201194931094682560103401001000410000301001000010000107904457354556136521112003101200351200521131503113658501003020010000100006020020000100001200551200521150201100991004010010000100000100100001100000001000011003240011071111966240000140131000040100120056120056120053120056120056
502041200528990000010101200201195141094492560100401021000010000301001000010000107901757364286134461112002801200351200551131413113669501003020010000100006020020000100001200551200521150201100991004010010000100000100100001100000001000011003210011011111965940002014131000040100120056120070120056120056120056
502041200558990000000101200401195111094652560103401021000010001301001000010000107901757362846136368112003101200551200351131503113669501003020010000100006020020000100001200551200521150201100991004010010000100000100100001100000001000011003210011011111965140002014131000040100120056120056120036120056120056
5020412003589900000101012004011949310946825601034010210001100003010010000100001078862573628461344611120031012005512005211315031136725010030200100001000060200200001000012003512005211502011009910040100100001000001001000001000000710000110032100210711119646400021411101000040100120056120056120053120053120056
5020412003589900000101012002011951410946825601034010210001100003010010000100001078862573545561365211120011012005512003511315031136725010030200100001000060200200001000012005512005211502011009910040100100001000001001000011000000010000010032100110111119662400001414131000040100120036120036120056120056120056
50204120052899000001010120020119514109468256010340102100011000030100100001000010790445736284613652111200310120055120035113155311366950100302001000010000602002000010000120055120035115020110099100401001000010000010010000110000000100001100332701107111196594000201401000040100120056120056120056120056120036
5020412003589900000101012004011951110947325601064010010001100003010010000100001078862573642861363681120031012005512003511314731136585010030200100001000060200200001000012005512005211502011009910040100100001000001001000011000000010000110032100110711119649400001411101000040100120056120056120039120036120086
502041200358990000010001200201194931094492560103401021000110000301001000010000107904457364286136521112003101200551200551131503113675501003020010000100006020020000100001200521200351150201100991004010010000100000100100001100000001000010003210011071111966240002011131000040100120036120056120053120036120056
5020412003589900000100112002011951410946825601004010210001100003010010000100001079044573642861363681120028012005512005511315031136695010030200100001000060200200001000012003512010011502011009910040100100001000011001000011000000010000110032100110711119646400001411131000040100120056120036120053120056120056

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0055

retire (01)cycle (02)0305080b0e0f1e22233f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8acafb5bbdcache load miss (bf)dtlb miss (c1)c2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
500251200589000001111012002011951310946525600134001210001100003001010000100001079566573642861327570120028120035120052113170031136745001030020100001000060020200001000012003512009311500211091040010100001000001010000011000000100001010003140210733119670400021411131000040010120036120036120056120056120056
500251200358990000000012004011949210946825600134001210001100003001010000100001079566573642861338660120028120052120055113173031136945001030020100001000060020200001000012012112005511500211091040010100001000011010000011000020100001010003140210734119670400001111131000040010120056120036120053120056120056
50024120035899000001001200401195131094492560013400121000010000300101000010000107956657362846132757012003112005512003511317303113691500103002010000100006002020000100001200551200521150022109104001010000100000101000000100000010000101000314031073311967040000140131000040010120089120041120036120036120036
5002412006489900000010120040119510109468256001340012100001000030010100001000010795175735455613371301200281200551200351131730311369150010300201000010000600202000010000120055120052115002110910400101000010000010100000110000001000010100031403107321196504000200131000040010120056120036120056120069120102
500241200358990000011012004011951310946825600134001010000100003001010000100001079517573545561337130120028120035120052113170031136915001030020100001000060020200001000012003512005211500211091040010100001000001010000011000000100000010003140210723119667400021414101000040010120056120036120056120036120036
500241200558990000001012004011951310946825600134001210001100003001010000100001079593573642861327570120011120052120055113173031136945001030020100001000060020200001000012005212005211500211091040010100001000001010000011000000100001010003140410733119650400021414131000040010120056120056120036120053120056
500241200558990000011012002511951310946825600134001010001100003016210000100001079566573628461327570120011120035120052113173031136745001030020100001000060020200001000012005512005211500211091040010100001000001010000001000000100000000003140310723119670400021411131000040010120056120036120056120036120036
50024120055899000001101200401195101094492560010400101000110000300101000010000107959357354556133713012003112005512003511315303113674500103002010000100006002020000100001200351200521150021109104001010000100000101000001100001010000101000319731071411974940000140101000040010120037120054120036120144120136
500241202559000000011012004111951310946825600134001010001100003001010000100001079593573545561337130120031120058120055113173031136945001030020100001000060020200001000012005812005211500211091040010100001000001010000011000017253100031010003140310743119670400021414101000040010120053120054120053120053120056
500241200578990001001012004011949210946925600104001210000100003001010000100001079517573545561338660120092120035120055113153031136795025430020100001000060020200001000012005512005211500211091040010100001000011010000011000013100001010003140310742119670400021411101000040010120056120056120053120053120041

Test 3: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ldr d0, [x6, x7, lsl #3]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x7, x7, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0052

retire (01)cycle (02)030508090b0e0f18191e1f22233f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5dcache load miss (bf)c2c5cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
5020512005889900001100101012003711951110946525601034010210000100003010010000100001079017573628461387431120011012005212005211314731136695010030200100001000060200200001000012005212005211502011009910040100100001000001001000001100000001000011003210110711119659400021111101000040100120053120053120053120053120053
502041200528990000001110101200201194931094492560103401021000110000301001000010000107888957364286148780112002801200521200521131413113669501003020010000100006020020000100001200521200521150201100991004010010000100000100100000110000000100000000321011071111965940002011101000040100120053120053120053120053120053
502041200528990000000060101201381194931094652560103401001000110000301001000010000107901757362846139374012001101200351200351131473113658501003084810000100006020020000100001200351200521150201100991004010010000100000100100000110000003100000000321011071111965940002111101000040100120053120056120054120056120053
5020412005289900001140101012003711949310946525601034010210001100003010010000100001079017573628461376530120028012005212005211314731136695010030200100001000060200200001000012005212009611502011009910040100100001000001001000001100000001000010003269110711119646400021111101000040100120053120053120036120053120054
5020412005289900001000101012003711951110946525601034010210000100003010010000100001079017573974061363370120028012005212005211314131136695010030200100001000060580200001000012005212005211502011009910040100100001000001001000001100000001000011203210110111119646400021111101000040100120053120053120036120053120053
50204120035900000000001300112003711951110946525601034010210001100003067610000100001079017573628461373480120028012005212005211314731136705010030200100001000060200200001000012003512005211502011009910040100100001000001001000021100000001000011003210110111119659400021111101000040100120053120053120053120053120053
5020412005289900000000100112003711951110944925601034010210001100003010010000100001079017573628461391130120011012005212005211314731136695010030200100001000060200200001000012005212005211502011009910040100100001000001001000001100001001000000003210110111119659400001111101000040100120053120053120053120053120036
502041200528991000000010001200371195111094652560103401021000010000301001000010000107886257362846138990012002801200521200521131473113669501003020010000100006020020000100001200351200521150201100991004010010000100000100100000110000000100001000321011011111965940000110101000040100120036120053120053120053120053
502041200528990000101129200112003711949410946725601424010210000100043010010207100001079017574238561399640120028012003512005211314731136695010030200100001000060200200001000012003512005511502011009910040100100001000001001000001100000001000010003210110111119659400021111101000040100120053120053120053120038120059
502041200529010011000531628001120037119511109467256010340102100011000030100100001000010790175736332613719701200280120054120052113154311366950100302001005510000602002000010053120036120152115020110099100401001000010000010010042011004402125678100471100418511071112063540239111101000040100123251122560123897123681123345

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0055

retire (01)cycle (02)0308090b0e0f18191e1f22233f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8a9acafb5dcache load miss (bf)c5cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
500251200528990100000000012003711949210946825600104001010001100003001010000100001079566573642861327570120034120035120055113170311369150010300201000010000600202000010063120052120035115002110910400101000010000010100000100000001000010003140610711119650400021111101000040010120083120056120056120056120053
5002412005589901011003700012002011951310946525600134001210001100003001010000100001079517573642861327570120011120055120052113173311369450010302091000010000600202000010000120055120052115002110910400101000010000010100001100000001000011003140110711119650400020001000040010120056120056120056120036120036
5002412003589901000000000120037119510109465256001340010100011000030010100001000010795665736428613371311200111200551200351131533113694500103002010000100006002020000100001200521200521150021109104001010000100000101000011000000010000100032831107111196674000000131000040010120056120056120056120056120056
50024120035899000100010001200371194921094682560013400101000010000300101000010000107951757354556133713012003112005512005511315331136945001030020100001000060020200001000012003512005211500211091040010100001000001010000110000003100000000314011071111966740002141401000040010120056120056120053120036120056
5002412003589900011001000120020119513109468256001040012100011000030010100621000010796115736284613275701200311200551200551131533113674500103002010000100006002020000100001200551200353150021109104001010000100000101000001000000310000100031401107111196704000014001000040010120036120053120053120053120056
500241200528990001000001012002011949210946825600134001210000100003001010000100001079566573545561327571120031120052120035113173311369450010300201000010000600202000010000120055120052115002110910400101000010000010100001100000001000011003140110711119670400021411131000040010120056120056120053120036120056
5002412003589900001001000120020119594109468256001340010100011000030152100001000010815715737948613615511200311225141227191139323114719563213196010000100006002020000100001200521200521150021109104001010000100000101000011000000121000010003140110711119667400001414131000040010120053120036120056120056120036
500241200558990001100100012004011951010946825600104001210000100003001010000100001079593573545561338660120028120052120055113153311369150284300201000010000600202000010000120052120052115002110910400101000010000010100000100000031000011003140110711119670400020001000040010120036120036120053120056120056
50024120055899000000000001200401195131094492560010400121000010000300101000010000107956657362846132757012003212005512005211317331136915001030020100001000060020200001000012005512005211500211091040010100001000001010000010000000100001100314011071111967040002110131000040010120053120036120053120056120056
500241200528990001100100012004011951310946525600134001210001100003001010000100561079593573545561327570120011120035120052113153311369450010300201000010000600202000010000120035120035115002110910400101000010000010100001100001001000211003140110711119667400001414131000040010120056120056120036120056120053

Test 4: throughput

Count: 8

Code:

  ldr d0, [x6, x7, lsl #3]
  ldr d0, [x6, x7, lsl #3]
  ldr d0, [x6, x7, lsl #3]
  ldr d0, [x6, x7, lsl #3]
  ldr d0, [x6, x7, lsl #3]
  ldr d0, [x6, x7, lsl #3]
  ldr d0, [x6, x7, lsl #3]
  ldr d0, [x6, x7, lsl #3]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3338

retire (01)cycle (02)03070a0b18191e3f4f5051inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9fa0a5a6a7a8acafc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8020526874200110000266920025801001008000010080015500117234802668226707267071663561665980115200800242001600482670726707118020110099100100800008000010080000080000008180000011151182161126704800001002670826708267082670826708
8020426782200110000266920025801001008000010080015500118236802668226707267071663561665980115200800242001600482670726707118020110099100100800008000010080000080000006080000011151181161126704800001002670826708267082670826708
802042676520111000026692002580100100800001008001450011670550266822670726707166356166598011520080024200160048267072670711802011009910010080000800001008000008000001680000011151181161126704800001002670826708267082670826708
80204268802001100002669200258010010080000100800155001167864026682267072670716635616659801152008002420016004826707267071180201100991001008000080000100800000800000015680000011151181161126704800001002670826708267082670826708
802042685320111000026699002580100100800001008001550011674920269562671126719166356166598011520080024200160048267112730411802011009910010080000800001008052208052201315980000011151421501126712800001002672326723267192672326713
802042670720011000026692202580100100800001008001550011675400266822670726707166356166598011520080024200160048267072671111802011009910010080000800001008000008000000080000011151181161126708800001002670826708267082670826708
8020426707200110000266920025801001008000010080015500117443402668726711267071663561665980115200800242001600482671026707118020110099100100800008000010080000080000001580000011151181161126704800001002670826710267082670826708
80204267212001100012266920025801001008000010080015500116659612668626707267071663561665980115200800242001600482670726707118020110099100100800008000010080000080000002480000011151181161126704800001002670826708267082670826708
80204267582011100002669200258010010080000100800165001166432026682267072670716635616659801152008002420016004826707267071180201100991001008000080000100800000800000010580000011151181161126704800001002670826708267082670826708
8020426707200110000266920025801001008000010080016500116731602668226707267071663561665980115200800242001600482670726707118020110099100100800008000010080000080000005780000011151181161126704800001002670826708267082670826708

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3351

retire (01)cycle (02)03070918191e3f4f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9fa0a6a8acafcfd5d6ddinst fetch restart (de)e0ld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80025268772000000026693025800101080000108000050116668826683267082670816652316688800102080000201600002670826708118002110910108000080000108000080000066800005020616762670580000102670926709267092670926709
800242674520100000266930258001010800001080000501166647266832670826708166523166888001020800002016000026708267081180021109101080000800001080000800000108800005020616652670580000102670926709267092670926709
800242681620000000266930258001010800001080000501166266266832670826708166523166888001020800002016000026708267081180021109101080000800001080000800000111800005020516562670580000102670926709267092670926709
800242681720100000266930258001010800001080000501174993266832670826708166523166888001020800002016000026708267081180021109101080000800001080000800000102800005020516772670580000102670926709267092670926709
80024268192000000026693025800101080000108000050115745226683267082670816652316688800102080000201600002671026708118002110910108000080000108000080000060800005020516552670580000102672826709267092670926709
800242670820001000266930258001010800001080000501173217266832670826708166523166888001020800002016000026708267081180021109101080000800001080000800000126800005020816882670580000102670926711267092670926709
8002426840201000002669302580010108000010800005011728342668326708267081665231668880010208000020160000267082670811800211091010800008000010800008000009800005020716782670580000102670926709267092670926709
80024267712000000026693025800101080000108000050116603226685267082670816652316688800102080000201600002670826708118002110910108000080000108000080000054800005020516662670580000102670926709267092670926709
8002426708200000002669302580010108000010800005011667502668326708267081665231668880010208000020160000267082670811800211091010800008000010800008000003801305020816652670580000102670926709267092670926709
800242684420000000266930258001010800001080000501173570266832670826708166523166888001020800002016000026708267081180021109101080000800001080000800000111800005020816862670580000102670926709267092670926709