Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDRSB (register, uxtw, 32-bit)

Test 1: uops

Code:

  ldrsb w0, [x6, w7, uxtw]
  mov x7, #4
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f191e1f22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
1005399311101041361013742181811251000100010001483813993992223257100010002000399821110011000100011019204210571059103861574218173116113969921000400400400399400
100439931111106501033842181816251000100010001536013983992213257100010002000399811110011000100011019204210570259103861564219073116113959921000399400400400399
100439931100106401033842181816251000100010001537513993992213256100010002000399821110011000100001021224110581159103861574219073116113959921000399400400400400
100439921110006501023842181816251000100010001531813993992213257100010002000399811110011000100001019214210571159103861574119073116113969921000400400399400400
100439921000006501033832181816251000100010001536213983992213257100010002000399811110011000100001019214210571059103861574219073116113959921000399400399400400
100439921000006501023831181816251000100010001536214423982213257100010002000399811110011000100001020214210570059103861574219273116113969921000399399400400400
100439931010006501023842181815251000100010001535713993992223256100010002000398811110011000100001020204210570159103861574219073116113959921000399400400400400
100439931100006500033842181815251000100010001533413984002213257100010002000398841110011000100001019214210571059103861564219173116113969921000400399400399400
100439921010006501033842181816251000100010001536213993992213257100010002000399821110011000100001019204210570159103861574219173116113969921000400399400400400
100439931010006501033842181816251000100010001531813993992283257100010002000399811110011000100001020204210571259103861574219173116113969921000400399400399399

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldrsb w0, [x6, w7, uxtw]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0051

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0f1e22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4020570056524100110700366978259695254010430103100013010010000616041334147049669550700517003564650364954401003020010000602002000070051351140201100991001000030100100000100100000110000000100001100261001711169814300031001310000301007005270052700957005270055
4020470035525000010700206978259710254010430103100003010010000616014334225449669710700517005164631364938401003020010000602002000070051351140201100991001000030100100001100100000110000000100000100261001711169814300031001010000301007005270036700527005270036
4020470051524000110700206978259710254010030103100003010010000616014334225449669710700357003564647364957401003020010000602002000070051351140201100991001000030100100000100100000010000003100001100261001711169818300030101310000301007005270052700527003770052
40204700515250003007002069783597102540104301031000130100100006160143342254496697107005170035646473649384010030200100006020020000700723511402011009910010000301001000001001000001100000031000010002610017111698743000310131010000301007005270036700557005270151
4020470051525000110701226976459710254010030100100013010010000616175334239849669560700357003564714364954401003020010000602002000070137351140201100991001000030100100000100100000110000000100001100261001711169814300031301010000301007005270052700367003670052
40204700795240001017003669764596952540104301001000030100100006160143341470496697407005170035646473649384031030200100006020020000701423511402011009910010000301001000001001000001100030001000001002610027111698143000300010000301007005570052700527003670036
4020470051524000100700206978259710254010430103100013010010000616175334151849669710700357005164631364938401003020010000602002000070084351140201100991001000030100100000100100000110000000100000100261011711169814300001001310000301007005570036700557003670093
4020470051525000100700776978559710254010430100100013010010000616014334225449669550700517003564647364938401003020010000602002000070051351140201100991001000030100100000100100000110000000100000100261001711169798300030101010000301007003670055700527005570052
4020470035524000101700206976459695254010430103100013010010000616014334225449669710700517005164647364957401003020010000602002000070051351140201100991001000030100100000100100000110000000100001100261001711169798300030131010000301007005270095700367005270036
40204700535240001017042669812597132540104301031000330562100516184463341566496741107055870573647973649544050130200100006020020000700513511402011009910010000301001000001001000001100000031000001002634018711698173002910131310000301007005570052700367003670036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0047

retire uop (01)cycle (02)03090e0f1e1f22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4002570047524001101070032697285970625400143001010002300101000061706833420620496695570047700476465336497640010300201000060020200007005035114002110910100003001010000010100000110000000100001102520017121698103000000610000300107004870051700367004870048
4002470035525000101070032697285969525400143001310001300101000061698233414700496696770047700476466536506240010300201000060020200007003535114002110910100003001010000010100000110000000100001102520017111698133000300610000300107003670048700487003670048
4002470035524000000070035697605970625400143001310001300101000061695233414700496697070050700506466536503240010300201000060020200007003535114002110910100003001010000010100000110000100100001102520017111698133001260010000300107004870048700487004870036
4002470047525000001070020697285970625400103001010000301611000061695233422060496697070035700476466536500540010300201000060020200007004735114002110910100003001010000010100000010000000100001102520011712698103000306610000300107004870048700487004870036
4002470035524001101070020697285970625400103001010001300101000061695233420620496696770035700476465336497240010300201000060020200007004735114002110910100003001010000010100000010000000100000002520017111698133000306010000300107003670048700487003670048
400247004752500034901070020697435969525400143001310001300101000061695233414700496696770047700476465336497240010300201000060020200007004735114002110910100003001010000010100000010000000100000102520017111698103000366010000300107003670048700517005170048
40024700475240004301070020697435969525400143001010001300101000061695233420620496695570047700476466536497240010300201000060020200007004735114002110910100003001010000010100000110000000100001102520017111697983000396610000300107005170048700487003670048
400247003552400024010700326972859706254001430013100013001010000617068334206204966955700477004764665364975400103002010000600202000070047351140021109101000030010100000101000001100004001000011025200110511698003000306610000300107031770053700487005470050
4002470047524010283521070020697315975325400143001010000300101000061709533414700496696770050700476470436497440010300201000060020200007003535114002110910100003001010000010100000010000406100001102520017111698133000306910000300107003670048700487004870036
400247003552401130001070035697285970925400143001310001300101000061695233414700496696770047700496466736497240010300201000060020200007003535114002110910100003001010000110100000110000000100001102520017121698133000060610000300107003670036700367004870316

Test 3: Latency 1->3 (with chain penalty)

Chain cycles: 3

Code:

  ldrsb w0, [x6, w7, uxtw]
  eor x8, x8, x0
  eor x8, x8, x0
  add x7, x7, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0047

retire uop (01)cycle (02)030e0f1e223f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
402057004752500117003569764596952540104301001000230100100006161753342206049669557005070050646463649534010030200100006020020000700353511402011009910010000301001000011001000011000000100001100261017111698103000300610000301007004870048700517004870036
402047004752400107003269735596952540104301001000130100100006160153342062149669557003570035646313649644010030200100006020020000700353511402011009910010000301001000011001000011000003100001100261017111698213000306010000301007003670048700487003670048
402047003852500107002069764597062540104301031000130100100516186243345259049669677004770047646313649504010030200100006020020000700473511402011009910010000301001000001001000011000010100000100261017111698133000366610000301007003670048700487004870048
402047004752400107003269735596952540100301001000130100100006161753342062049669557004770047646433649504010030200100006020020000700473511402011009910010000301001000001001000001000000100000000261017111698103000366010000301007003670036700517005170036
402047003552500107003269764597062540100301031000130100100006160153342062049669557003570047646463649534010030200100006020020000700473511402011009910010000301001000001001000001000013100001100261017111698103000306910000301007004970051700487004870048
402047004752400117003269735596952540104301001000030100100006160153342062049669677004770047646313649504010030200100006020020000700503511402011009910010000301001000001001000001000000100001100261017111698133000360610000301007003670036700367005170051
4020470035525001007003269735597062540104301031000130100100006160153342062049669677005070047646433649504010030200100006020020000700353511402011009910010000301001000001001000001000000100000000261017111698133000309910000301007003670036700487004870051
402047005052500107002069735597062540104301001000030100100006160153342062049669717003870035646313649534010030200100006020020000700473511402011009910010000301001000011001000011000000100000100261017111698133000300910000301007003670048700517005170048
402047005052500117003269735597092540104301031000130100100006160053341470049669707005070035646463649384010030200100006020020000700503511402011009910010000301001000001001000011000000100001000261017111698103000366610000301007004870048700487003670048
402047003552401607003569781597062540100301031000130100100006160053341470149639377005070050646433649534010030200100006020020000700503511402011009910010000301001000001001000001000000100001110261017111698103001200610000301007005170048700517004870048

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0054

retire uop (01)cycle (02)03090e0f191e1f22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
400257014354301101000700396979159715254001030013100013001010000616991334225404966971070054700546467203649794001030020100006002020000700543511400211091010000300101000010100001100000010000101025203714269798300030101310000300107005570055701367008570036
4002470035524000000007003669775597252540014300131000130010100006170683342398049669710700357005164669036497940010300201000060020200007003535114002110910100003001010000101000001000010100001010252027144698143000010101310000300107003670036700527014570055
400247005152400001010700396977559763254001430010100003001010000616991334239804966974070051700546467203649794001030020100006002020000700543511400211091010000300101000010100000100000010000101025203712269817300001301310000300107005570055700527013470055
4002470054524000010107003969775597212540014300131000130010100006169913342254049669550700517005164669036497940010300201000060020200007005435114002110910100003001010000101000001000000100001010252027144698143000313131310000300107003670055700527012970036
400247003552400001000700206977559766254001430013100013001010000617018334239804966955070035700546467203649794001030020100006002020000700353511400211091010000300101000010100001100000010000100025203712269817300001313010000300107005570036700527015970055
400247005152500002000700206981259713254001430013100013001010000617018334239804966977070054700546467273649794001030020100006002020000700543511400211091010000300101000010100000100000010000000025204712269817300030131010000300107003670036700527014170036
4002470054524000010107002069743597602540014300131000030010100006170183342398049669740700547003564653036497940010302151000060020200007005135114002110910100003001010000101000011000000100001010252047142698173000310131310000300107005270055700527013070055
4002470035524000010007002069775597432540014300131000130010100006170183341470049669710700357005464672036497940010300201000060020200007005435114002110910100003001010000101000001000000100001010252027143698173000313101310000300107005570055700527015270036
400247003552400006000700396977559738254001430013100013001010000617018334239804966974070054700546465303649794001030020100006002020000700543511400211091010000300101000010100000100001010000101025204713469817300031010010000300107014470055700837005270052
4002470051524000010007002069775597992540014300101000030010100006170183342398049669740700547003564672036497940010300201000060020200007005935114002110910100003001010000101000011000000100001010252027122698173000313131310000300107005570055700527012770055

Test 4: throughput

Count: 8

Code:

  ldrsb w0, [x6, w7, uxtw]
  ldrsb w0, [x6, w7, uxtw]
  ldrsb w0, [x6, w7, uxtw]
  ldrsb w0, [x6, w7, uxtw]
  ldrsb w0, [x6, w7, uxtw]
  ldrsb w0, [x6, w7, uxtw]
  ldrsb w0, [x6, w7, uxtw]
  ldrsb w0, [x6, w7, uxtw]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3340

retire uop (01)cycle (02)03mmu table walk data (08)0e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802052672220001041101267161703017258010010080000100800165001172500492365226731267161666261668380114200800242001600482681571118020110099100800001008000011008000039080000035800356103911151180160026704664800001002672326723267082672326728
802042672220001041101267122181812258010010080000100800155001166596492362726722267221665061665980115200800242001600482672771118020110099100800001008000001008000039080000108003961353911151180160026722664800001002672326723267232672326708
80204267072000004100126707001811258010010080000100800155001167875492364226707267221663561667480114200800242001600482672356118020110099100800001008000001008000000800350358000061353911151180160026719602800001002672326723267232672326723
8020426727200000411012670721818122580100100800001008001450011665964923642267222670716635616674801142008002420016004826726711180201100991008000010080000010080134390800350398003500353911151180160026719660800001002672326723267232672826728
80204267222000114110126707201202580100100800001008001450011598144923642267072672216650616674801142008002420016004826861811180201100991008000010080000010080000580800511548005461544311151180160026704662800001002672326723267232670826728
8020426722200000450012670721818122580100100800001008001550011598144923642267222672216650616674801142008002420016004826724721180201100991008000010080000010080000390800350358003561354311151180160026724662800001002672326723267232672326723
802042670720000000012670721818112580100100800001008001450011678754923627267222672216650616659801142008002420016004826707711180201100991008000010080000010080000390800350428003561353911151180160026719660800001002670826723267082672326723
8020426707200011010126707201212258010010080000100800155001166596492364226707267071665061667480114200800242001600482672771118020110099100800001008000011008000039080035135800006004311151180160026704662800001002672326723267232672326723
80204267072000000102267070120122580100100800001008001450011673034923642267222672716650616674801152008002420016004826722711180201100991008000010080000110080000390800000358003561353911151180160026728663800001002673226733267332673326732
802042673120000025100267072180122580100100800001008001650011658564923642267222672216650616659801142008002420016004826707711180201100991008000010080000010080000390800350358003561353911151180160026719062800001002672626724267262672326726

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)dbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800252672720000101100001012671221271272580010108000010800005011688434923628267272672716822316707800102080000201600002670877118002110901080000108000001080000043800390003980000613944005020217042267281410480000102671726730267392673326709
8002426728200000000000010126693212020258001010800001080000501167907492365626737267361668231669580010208000020160000267148511800211090108000010800000108000004380038000388003961394300502041604226724010780000102673226730267402674726729
8002426731200000000004501012669320721258001010800001080000501165304492365626715267141668131671680010208000020160000267378511800211090108000010800000108000004380039000080000613844005020416024267251010480000102684026748268262675026737
800242673620011110000210000267213772125800101080000108000050116688649236482672726729166763167118001020800002016000026708771180021109010800001080000010800201943800580006080040611943191502041604226734013580000102684226743267862671926728
800242673120000000000450100267122120262580010108000010800005011668964923651267272670816676316711800102080000201600002672756118002110901080000108000001080000043800390004280039613944005020216034267281010480000102685026742267372730826719
8002426737200101100006700022672130716825800101080000108000050116720149236512670826727166723167078001020800002016000026731771180021109010800001080000010800201943800580016080040605843190502021604426733130580000102673826852267102673926732
80024267282000000000045010126712217202580010108000010800005011672984923635267362673616659316695800102080000201600002673711311800211090108000010800000108000004380039000398003960043005020416042267251410080000102675326739267512675026738
8002426737201100100006600032672137716258001010800001080000501168843492364726728267271667231668880010208000020160000267287711800211090108000010800000108002120438005901061800406159431935020316044267331313580000102673726863267382673126732
80024268672000010001021088001268742127175258001010800001080000501167219492363526719267141666031671680010208000020160000267198511800211090108000010800000108000004380000020428016961394300502042404226728010080000102673726737267682674126746
800242673720111110000670103267000771825800101080000108000050116675049236512673126731166523167088001020800002016000026728771180021109010800001080000010800211945800600016180041615901915020216042267341313580000102678826749267332680426738