Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDRSB (register, 32-bit)

Test 1: uops

Code:

  ldrsb w0, [x6, x7]
  mov x7, #4
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92inst int load (95)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
10054033111100211023883770251000100010001548014034032253261100010002000404851110010100010001020194310581016110406158431907311611400131351000404404404382404
100440321111006610236720718251000100010001550114034032253261100010002000403851110010100010001019194310591026110406119431907311611400131351000404404404404404
100440231111016710238737919251000100010001453314024032253261100010002000403851110010100010001020204310590026010396059431917311611400131351000404404404404403
100440331111006710038737021251000100010001555514033822253261100010002000403851110010100010001020204310590016110390159431907311611379131351000404383404404404
10044023111000670033883071925100010001000155551402403225326110001000200040385111001010001000102020431059100601040615801917311611400131351000404404404404404
100440631100006710239239021251000100010001548014033812253261100010002000402851110010100010001020194310190016010000019431907311611378131351000404382404404404
100440231111002110338837720251000100010001559714034032043260100010002000403861110011100010001021204310581016110406158431917311611400131351000403404404382404
100440331111006710038827018251000100010001550114064032263260100010002000403861110010100010001020194310590016010406158431917311611400131351000404404404404404
100440331111006710338837719251000100010001555914034032253261100010002000403851110010100010001019214310591012110406158431917311611400131351000404404383403404
10044032111000671023883070251000100010001552613814032253261100010002000403851110010100010001022204310590012110396159451927311611400131351000382406382404382

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldrsb w0, [x6, x7]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0054

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22243f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
402057005152400000010107002069790597132540104301001000130100100006160413341470149669740700547005464650364957401003020010000602002000070051351140201100991001000030100100000100100000110000031000311026101711169814300000101310000301007005570055700557003670036
4020470054525000000100070020697825971325401043010010001301001000061604133423980496698007005370054646503649574010030200100006020020000700353511402011009910010000301001000001001000001100000010000100261017111698173000013101310000301007005570036700367005570036
4020470054524000000100070039697645969525401003010310001301001000061604133423980496697107005470054646503649574010030200100006020020000700353511402011009910010000301001000001001000001100000010000110261017111698143000013131310000301007005570055700367005570055
402047005452500000010007002069785597132540100301031000130100100006160413341470149669710700547005464650364957401003020010000602002000070054351140201100991001000030100100000100100000110000001000010026101711169817300030131310000301007003670052700527005570036
40204700515250000000010700396978559713254010430103100013010010000616175334239814966971070054700546465036493840100302001000060200200007005435114020110099100100003010010000010010000011000000100000002610171116981730003130010000301007003670055700557005270055
402047005452500000010107003669764597132540100301031000130100100006160413342398149669550700547003564650364957401003020010000602002000070054351140201100991001000030100100000100100000110000001000011026101711169814300030131310000301007005270052700557005270055
40204700355240000001000700366976459713254010430100100013010010000616041334239814966978070054700546463136495740100302001000060200200007003535114020110099100100003010010000010010000011000010100001102610171116979830000001310000301007005570055700557006070036
40204700545250000001000700366978559713254010430103100013010010000616014334225414966974070035700356465036495740100302001000060200200007005435114020110099100100003010010000010010000011000000100001102610171116981730003130010000301007005570055700367003670055
402047003552400000010007002069785596952540104301031000130100100006161753342398049669710700547005464631364954401003020010000602002000070051351140201100991001000030100100000100100000110000001000010026101711169817300031301310000301007005570036700557005570036
4020470051525000100101070039697645971325401043010310001301001000061604133414700496697407003570054646313649574010030200100006020020000700543511402011009910010000301001000011001000001100000010000110261017111698173000313131310000301007005270055700367005570055

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0047

retire uop (01)cycle (02)03090e0f191e1f22233f43494d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
40025700475251000600070032006976059784254001430013100013001010000616952334147014966967700477004764665364975400103002010000600202000070035351140021109101000030010100000101000011000030100001010252067142698103000390610000300107003670036700527013170048
40024700505240100101070032006972859725254005830013100013001010000616982334147014966970700507003564653364972400103002010000600202000070266351140021109101000030010100000101000011000000100001010252047144698103001266610000300107005170048700487004870048
40024700355250000101070038006972859792254001430013100013001010000616952334147014966970700507004764665364960400103002010000600202000070047351140021109101000030010100000101000011000000100001010252047144698133000360910000300107004870048700487005470048
40024700505250000100070032006974359822254001830016100013001010000617027334244614966975700557005564673364980400103002010000600202000070055351140021109101000030010100000101000021000000100001010252027143698103000366910000300107004870036700367005470036
40024700475250100100070020006976059752254001430013100013001010000616952334147014966955700477003564665364972400103002010000600202000070035351140021109101000030010100000101000011000030100001010252047124697983000096610000300107003670048700487005470048
40024700765250000100070032006974359794254001430013100013001010000616952334206214966967700477003564665364972400103002010000600202000070047351140021109101000030010100000101000011000000100001010252047143698103000369610000300107003670048700487005470051
400247005052500001010700320069743597681234001430010100013001010000616952334206214966955700357003564665364975400103002010000600202000070151351140021109101000030010100000101000011000000100001010252027144698103000399610000300107005170048701447014670048
40024700475240000100070032006976059808254001430013100013001010000616982334147014966955700477004764665364960400103002010000600202000070047351140021109101000030010100000101000011000101100011110252027143698183000666610000300107005670056700567006270051
40024700475250000100070035006972859830254001430013100013001010000617068334206214966967700477003564653364975400103002010000600202000070050351140021109101000030010100000101000011000000100001010252047142697983000300910000300107005170051700527007470048
40024700475240000600070035006972859823254001430013100013001010000617068334147014966967700477004764665364972400103002010000600202000070047351140021109101000030010100000101000011000000100000010252027144698103000366610000300107003670036700487004270040

Test 3: Latency 1->3 (with chain penalty)

Chain cycles: 3

Code:

  ldrsb w0, [x6, x7]
  eor x8, x8, x0
  eor x8, x8, x0
  add x7, x7, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0047

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e1f22243a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
402057004852500101110100700386978459712254010830106100023010010000616032334235004966967700477004764643364938401003020010000602002000070047351140201100991001000030100100001100100000010000000310000101000261017111698283000306010000301007004270054700547005470042
402047005352411010010000700326973559706254010430103100013010010000616015334147004966967700477004764643364938401003020010000602002000070047351140201100991001000030100100000100100000110000000010000011100261017111698163000666610000301007005470054700547005470054
402047005352410000100000700326973559695254010430100100013010010000616015334147004966967700477004764643364950401003020010000602002000070047352140201100991001000030100100000100100000110000000010000101000261017111697983000060610000301007004870048700487003670048
402047004752400000020001700386978459712254010430106100023010010057616545334287804966967700357004764643364938401003020010000602002000070052351140201100991001000030100100000100100000110000000310000101000261017111698103000306610000301007004870048700487004870036
402047004752400000060100700386978459712254010830106100023010010000616032334176904966967700477004764643364950401003020010000602002000070035351140201100991001000030100100000100100000110000000010000111110261017111698163000666610000301007005470054700547005470042
402047005352511010010000700206973559706254010430103100003010010000616015334206204966967700477004764631364950401003020010000602002000070047351140201100991001000030100100000100100000010000000010000111100261017111698163000666610000301007005470042700547005470054
402047005352510010110100700326973559706254010430103100013010010000616015334147004966967700477004764643364950401003020010000602002000070047351140201100991001000030100100000100100000010000030010000111100261017111698163000666610000301007005470054700547005470054
4020470053524100000280100700326973559706254010430103100013010010000616015334279404966973700537004164643364944401003020010000602002000070053351140201100991001000030100100000100100013110002001110000000000261017111698103000066610000301007004870048700487003670048
402047004752500000000100700326973559695254010430103100013010010000616175334206204966973700537005364649364956401003020010000602002000070041351140201100991001000030100100000100100023110001002110000101000261017111697983000360010000301007003670048700487005770048
402047003552400000120001700386978459712254010830106100023010010000616032334235004966955700477003564631364950401003020010000602002000070047351140201100991001000030100100000100100000010000000010000110110261017111698103000366610000301007004870048700487003670052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0050

retire uop (01)cycle (02)03mmu table walk instruction (07)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22243a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4002570047525101011001600037003269748598732540014300131000130010100006170683342206496696970048700476465336497540010300201000060020200007005035114002110910100003001010000010100000010000000100001000252615711616698133000366910000300107008070078700957005170048
400247004752410100000101037003269777597062540014300101000130010100006169823341470496697070035700356466536497240010300201000060020200007041235114002110910100003001010000010100000110000420100000100252616951416697983000306910000300107010270091700767005070051
40024700355251010000057400037003570008597062540010300101000030010100006169523341470496697070050700506465336497540010306771000060020200007005035114002110910100003001010000010100000110000000100001100252617711515697983000300910000300107005370101700567005170051
400247005052510110000000037003369761597522540014300101000130010100006169913341614496697070050700476465436497540010300201011060020200007005035114002110910100003001010000110100000110000000100001100252616711415698103000390910000300107005270054700487003670051
400247003552510100040600037002069773598632540014300101000030010100006169823341470496697070050700506467436496040010300201000060020200007005035114002110910100003001010000010100000110000003100001010252615711415698103000366010000300107010470080701207005170051
400247005052510100000600037002069800597062540014300101000130010100006169823342302496696770047700356466536497540010303471005560020200007004735114002110910100003001010000010100000010000100100000100252616711615697983000390010000300107009470066700587005470048
400247005052510100000101037003569774597062540061300101000030010100006169823342206496723970050700506466836496040010300201000060020200007005035114002110910100003001010000010100000110000000100001100252617711115698133000090610000300107009170069700627005170051
400247004752410100000100037003569778597062540014300131000130010100006170183352586496697070047700356467036497540010300201000060020200007005035114002110910100003001010000010100000110000000100001100252612711516698733000360610000300107009570096701247005170055
400247008752411100000101037003569860597062540014300131000130010100006169523341470496697070050700506465336497540010300201000060344200007005035114002110910100003001010000110100000110000000100001000252615711416698133000390910000300107005170056700367003670036
400247005052510101000600037003269801597092540014300131000130010100006169823342206496696770047700506465336496040010300201000060020200007005035114002110910100003001010000010100030110000000100001100252617712218698103000369610000300107009470079700807005170051

Test 4: throughput

Count: 8

Code:

  ldrsb w0, [x6, x7]
  ldrsb w0, [x6, x7]
  ldrsb w0, [x6, x7]
  ldrsb w0, [x6, x7]
  ldrsb w0, [x6, x7]
  ldrsb w0, [x6, x7]
  ldrsb w0, [x6, x7]
  ldrsb w0, [x6, x7]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)030e0f18191e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020526738200000045101267122121216258010010080000100800145001167303492364702672726727166556166798011520080024200160048267277711802011009910080000100800000100800004380000039800396139431115118016002672410104800001002672826728267282670826708
802042672720000004510026712211216258010010080000100800145001167303492364702672726727166556166798011520080024200160048267277711802011009910080000100800000100800004380039039800396039431115118016002672410104800001002672826728267282672826728
802042670720000004510126692212122825801001008000010080015500117711649236470267072672716655616679801152008002420016004826727771180201100991008000010080000010080000438003904280039613901115118016002672410104800001002672826728267282672826728
8020426727200000001012671221212182580100100801301008001550011668114923637026730267111663561667980116200800242001600482672777118020110099100800001008000011008000043800390398003960390111511801600267241004800001002672826728267282672826728
8020426727201010045101267122120162580100100800001008001550011771164923627026727267271665561665980115200800242001600482672756118020110099100800001008000001008000043800390398003961043111511801600267241000800001002670826708267082672826708
802042672720000004510026712212121625801001008000010080015500116730349236470267072672716655616679801152008002420016004826727771180201100991008000010080000010080000080039039800396139431115118016002672610104800001002672826728267282672826728
80204267272000000450012669201212162580100100800001008001550011771164923647026727267271665561667980115200800242001600482672777118020110099100800001008000001008000043800390398000061394311151180160026724000800001002672826728267282673226728
802042672720101004610126712212120258010010080000100800145001167198492364702672726727166596166598011520080024200160048267277711802011009910080000100800000100800004380039042800500139431115118016002672410104800001002672826728267282672826728
802042672720001004510126712212121625801001008000010080015500117711649236560267272672716655616679801162008002420016004826727771180201100991008000010080000010080000438003913800396139431115118016002672410100800001002672826708267282672826728
8020426707200000045101267122121216258010010080000100800155001167303492364702672726727166556166798011620080024200160048267277711802011009910080000100800000100800004380039039800396139431115118016002672410104800001002672826728267282672826728

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800252673320010100006510126717218182925800101080000108000050116696014923652267322673516677316712800102080000201600002673381118002110910800001080000010800192141800190005980038610390050206165426725106280000102674126737267262670926709
80024267282000000000411012670701801225800101080000108000055116760514923648267082672716672316688800102080000201600002670871118002110910800001080000010800000080035000080035613543005020516542671906280000102673626723267232672326729
80024267222000000000411012670721218025800101080000108000050116760514923628267082672216667316702800102080000201600002672271118002110910800001080000010800000398003900038800396100005020516452671966280000102675426723267292670926728
8002426708200000000041100267072181812258001010800001080000501167605149236422672226708166673166888001020800002016000026708711180021109108000010800000108000000800390003580035613543005020516452671960480000102672726744267292672326728
800242672220000000104110126707201815258001010800001080000501166750149236422672226708166523166888001020800002016000026722561180021109108000010800001108000003980035000080035013501605020516452672999280000102673926738267332673426715
8002426733201111000064103267172181816258001010800001080000501170204149236522671526732166773167128001020800002016000026715811180021109108000010800001108001920428005810159800380157421915020516552672999280000102673326725267342674026715
80024267142001110110661012671721801625800101080000108000050116582214923652267332673216677316712800102080000201600002673381118002110910800001080000010800191942800570002180039615701905020416452672999080000102674626716267162673426733
80024267322001000100651032671720181525800101080000108000050116759904923653267142673216677316712800102080000201600002673281118002110910800001080000110800191942800190005980038605701905020516552672990280000102673626739267332671626734
80024267332001010000210032670030181525800101080000108000050116696004923653267322673316677316712800102080000201600002673282118002110910800001080000010800202008005710159800000156421905020516552672990280000102674626733267332673326716
80024267162001000111650012671720181625800101080000108000050116752914923634267332673216677316712800102080000201600002671581118002110910800001080000110800201842800191006280037605801915020516542672999080000102673326733267332673326716