Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDRSH (32-bit)

Test 1: uops

Code:

  ldrsh w0, [x6]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03090e0f1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)l1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
100539431114410138321119251000100010001526703983982213256100010001000398771110011000100010004310393810386139437311611395141071000399399399399399
100439830004410138321119251000100010001503713983982213256100010001000398771110011000100010004310393810386139437311611391141471000395399395395399
10043942010440013862121216251000100010001527403943982213252100010001000397771110011000100010004310383910386139447311611395141041000399399399399395
10043943000451013832121219251000100010001527403943942163252100010001000394771110011000100010004310393910396136447311611391101041000399399399399395
100439820104410138221116251000100010001527403983942213256100010001000394771110011000100010004310383810386139447311611395141471000399399395399399
10043983010440013832121216251000100010001498903983982173256100010001000394771110011000100010004310383810396139437311611391141471000400399399395399
100439430104510138321119251000100010001520813983982213256100010001000398771110011000100010004310383810386138447311611395141471000399399399399399
1004398201045001379212119251000100010001498913983982213256100010001000398771110011000100010004310383810386139447311611391141041000399395395395399
10043943010440013832121216251000100010001498913983942163252100010001000398771110011000100010004310383810386139437311611391141471000399399399395399
10043983010451013832121216251000100010001503703983982213256100010001000398771110011000100010004310383810386139447311611395141471000399399395395399

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldrsh w0, [x6]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0056

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4020570053525101110201007004169787597122540108301061000230100100006160323342494049669737005670053646373649884010030200100006020010000700563511402011009910010000301001000001001000121100020111000011110261027111698193000696910000301007006070054700547006070057
4020470056526101010101007004169784597013840104301061000230100100006160783342494049669767004170056646523649594010030200100006020010000700563511402011009910010000301001000001001000231100010211000011110261017111698193000666910000301007005770054700577005770057
4020470056525110000201007003869787597122540104301061000230100100006160593342494049669767005670053646523649594010030200100006020010000700563511402011009910010000301001000001001000221100011141000011011261017111698193000399910000301007004270042700577005770057
402047005652511000068500007004169715597162540108301061000230100100006160783342494049669617004170056646523649594010030200100006020010000700563511402011009910010000301001000001001000330100020111000011110261017111698163000306910000301007005770057700427005770057
4020470041524111000201007004169787597012540108301061000230100100006160593342494049669767005370041646373649444010030200100006020010000700563511402011009910010000301001000001001000311100020211000011010261017111698193000690910000301007004270054700427005470042
4020470041524110000100017002669702597152540108301031000230100100006160593342494049669767004170053646523649564010030200100006020010000700563511402011009910010000301001000001001000111100010011000001111261017111698043000699010000301007005770057700427004570042
4020470056525101010201007003869784597012540104301061000230100100006160783342494049669617005370041646523649594010030200100006020010000700843511402011009910010000301001000001001000120100011111000001010261017111698193000696010000301007004270042700547005770057
4020470056525111000201007004169702597122540108301031000230100100006160593341769049669767004170056646523649444010030200100006020010000700563511402011009910010000301001000001001000110100010011000011111261017111698193000699910000301007005470042700577005770057
4020470053525110000200007003869787597012540104301061000230100100006160593342494049669617005670041646373649594010030200100006020010064700613511402011009910010000301001000001001000211100021111000011110261017111698163000660910000301007005770057700577005770057
4020470056524100000100017002669787597152540104301061000230100100006160593341769049669767005670056646373649594010030200100006020010000700533511402011009910010000301001000011001000111100010011000011111261017111698193000690910000301007005770054700427005770057

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0047

retire uop (01)cycle (02)03mmu table walk instruction (07)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
40025700635270000100013010700326995459695254001430013100013001010000616982334220600496696770047700476466836497540010300201000060020100007005035114002110910100003001010000110100001100000000100001102520017112698103000306910000300107004870051700367003670051
4002470035524000000001010700326976059695254001430013100013001010000616952334220600496696770050700356466536497540010300201000061344100007005035114002110910100003001010000110100001100000003100001002520037131697983000360610000300107004870048700487003670049
4002470047525000000001010700356974359709254001430013100013001010000616982334206200496696770055704186466836497240010300201000060020100007003535114002110910100003001010000010100001100000000100001002520017112698103000306610000300107005370048700487004870048
4002470035524000000001000700326976059709254001030013100003001010000617068334206200496697070050700506466836497240010300201000060020100007005035114002110910100003001010000010100001100000009100121002520027121698103000066610000300107004870036700487004870048
40024700505240100000010007003269728597062540010300131001230010100006169523341470004966967700477004964670364972400103002010000600201000070047351140021109101000030010100000101000001000002012100001102520017121698133000309910000300107004870036700487003670054
4002470050525000000000000700206972859695254006430010100013001010000616952334206210496697170037704166466836497540010300201000060020100007003535114002110910100003001010000110100001100000000100001102520027121698103000306610000300107003670048700367004870036
4002470035525000010001000700206976059710254001430049100003001010000616982334220600496697070047700506465336497540010300201000060020100007003535114002110910100003001010000010100000100000000100001102520017112698103000066610000300107004870048700487003670048
4002470047524000000000000700326974359695254001430013100003058410000617068334206200496696870052700356465336497240010300201000060020100007004735114002110910100003001010000010100000100000108879100001002520017123698103000306610000300107004870048700487004870051
40024700475250000000013010700326973659706254001430013100013001010000617068334206200496696770047700356466536497240010300201000060020100007003535114002110910100003001010000110100001100000000100001102520027912698133000099610000300107005170051700517005170036
4002470050524000010006010700326972859706254001430010100013001010000616952334206200496696770047700506466536496040010300201000060020100007005035114002110910100003001010000110100001100000000100001102520017112697983000399910000300107004870048700367005170036

Test 3: throughput

Count: 8

Code:

  ldrsh w0, [x6]
  ldrsh w0, [x6]
  ldrsh w0, [x6]
  ldrsh w0, [x6]
  ldrsh w0, [x6]
  ldrsh w0, [x6]
  ldrsh w0, [x6]
  ldrsh w0, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6061696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802052672820000000450012671220121625801001008000010080000500117703810492365726736267361665931669580100200800002008000026736851180201100990100800001008000001008002020438005900161800406139430005110001161126724010100800001002672826728267282672826728
80204267272000000021102266990770258010010080000100800005001170073104923647267272672716630316685801002008000020080000267277711802011009901008000010080000010080000008003900039800396139430005110001161126724010104800001002672826728267282672826728
8020426727200000002100226721277192580100100800001008000050011672310049236472672726727166503166858010020080000200800002672777118020110099010080000100800000100800000438003900039800396159431810511000116112672401004800001002672826728267282672826728
8020426727200000004510126712201216258010010080000100800005001165789004923647267072672716650316685801002008000020080000267277711802011009901008000010080000010080000043800000003980039613900005110001161126724010104800001002672826728267282672826728
8020426727200000004510126712201216258010010080000100800005001177038104923647267272670716650316685801002008019020080000267275611802011009901008000010080000010080000043800390000800396139430015132001161126704010104800001002672826728267082672826728
802042672720001000210032669907720258010010080000100800005001166519104923647267272672716650316685801002008000020080000267275611802011009901008000010080000010080000043800390000800006139430005110001161126724010104800001002672826728267282670826728
8020426727200001007210126692212121725801001008000010080000500116652510492362726727267071663031668580100200800002008000026727771180201100990100800001008000001008000004380039010398003961043000511052116112672400104800001002670826708267282670826708
802042670720000000450012671221212162580100100800001008000050011657891049236342671426714166593166948010020080000200800002673785118020110099010080000100800000100800201943800581016080040013900005110001161126724010100800001002670826728267282672826708
80204267272000110067102267213000258010010080000100800005001167231054923647267272681916650316685801002008000020080000267277711802011009901008000010080000010080000008000000040800396158019105110531161126704010104800001002672826708267282670826728
80204267272000001045001267123012162580100100800001008000050011665251549236352672726727166503166858010020080000200800002672777118020110099010080000100800001100800000438000000039800396039430005110531161126724010104800001002672826708267352672826728

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3340

retire uop (01)cycle (02)03090e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800252672220110000022671221200258001010800001080000501166818014923647267222672816672316702800102080000208000026708711180021109108000010800001108000008003501888003560039502020169626719100280000102670926723267232672326728
8002426866200000000126712018181225800101080000108000050116941000492364226708267081666731670280010208022020800002672271118002110910800001080000010800003980039035800396135050209169626724100480000102672926729267292672926723
800242672220200041101267072012122580010108000010800005011682760149236422670826722166671216702800102080000208000026708711180021109108000010800000108000039801651358000001039502061611102671960280000102672326728267092672926709
80024267222000004510226712012120258001010800001080000501166801014923648267222670816672316702800102080000208000026727711180021109108000010800000108000039800360358003901354350205169626705100480000102670926728267282672826728
8002426722201100000126693201812258001010800001080000501166996014923642267222672216667316702800102080000208000026722711180021109108000010800000108000008000003580035610050201016982671900280000102670926723267232672326723
80024267222000000001266932181816258001010800001080000501179515014923642267082670816652316702800102080000208000026708711180021109108000010800000108000039800350358003561353950206167526705106080000102672926729267232672926723
80024267222010000102267132121202580010108000010800005011718850149236282670826722166723166888001020800002080000267285611800211091080000108000001080000398003903580000610395020916872671966280000102672326709267232672326723
800242672220200041101267072180025800101080000108000050116910101492362826722267081666731670280010208000020800002672271118002110910800001080000010800000800000191800356135395020916882671960280000102672326723267232672326723
8002426708201001410012671221218162580010108000010800005011722400149236472672826727166523167088001020800002080000267087111800211091080000108000001080000398003503880000613505020716952670506480000102672926709267292672326709
8002526899200010010226693201216258001010800001080000501167209014923642267082670816652316688800102080000208000026722711180021109108000010800000108000039800000358003561354350208161092670566080000102672326709267232672326760