Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDR (register, 64-bit)

Test 1: uops

Code:

  ldr x0, [x6, x7]
  mov x7, #4
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
100540330004500138321119251000100010001498903983982213256100010002000398771110011000100011000043103803810386139447321611395141071000399399399399399
100439820004400138301116251000100010001527413983982213252100010002000394771110011000100001000043103803810386139447311611395141471000395399399395399
100439430114500138301119251000100010001526714013982213258100010002000398771110011000100001000043103803810386139447311611395141471000399399399399399
1004398311145101383211219251000100010001527413983982213256100010002000398771110011000100001000043103803810386139447311611395141041000399399399399399
100439830004410138321119251000100010001520803983982213256100010002000398771110011000100001000043103903810386138447311611391101041000395395395395395
100439430004400138321119251000100010001501813983982213256100010002000398771110011000100001000044103943810386138447311611395141471000399399399399399
100439830004400138331119251000100010001526713983982213256100010002000398771110011000100001000044103903810396138447311611395141041000399399399399399
100439830004400138321119251000100010001520813983982213256100010002000398851110011000100001000044103803810396139437311611395101071000399399399399399
100439830004410138321119251000100010001520803983982213256100010002000398771110011000100001000043103803810386138447311611395141471000399399399399406
100439430004400138331119251000100010001526713983982213256100010002000398771110011000100001000144103804110386138447311611391101071000395399399399395

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldr x0, [x6, x7]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0047

retire uop (01)cycle (02)030e0f1e22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
40205700475241111070032697355970925401043010310001301001000061601533414701496697070035700356464303649534010030200100006020020000700473511402011009910010000301001000001001000011000000100001110261027111697983000396610000301007003670051700487005170051
40204700475251111070032697645970625401043010010001301001000061601533420620496697470050700476464603649504010030200100006020020000700513511402011009910010000301001000001001000011000000100001011261017111697983000396010000301007005170048700487004870051
40204700355250001070032697815970925401003010310001301001000061617533420621496697070047700476463103649534010030200100006020020000700473511402011009910010000301001000001001000011000030100000010261017111698103000066910000301007004870036700517004870036
40204700505240011070035697645970925401043010310000301001000061600533422061496697070047700476463103649534010030200100006020020000700353511402011009910010000301001000001001000011000000100000010261017111698103000366010000301007003670048700367005170036
40204700935250011070035697355970925401043010310001301001000061600533422061496695570047700356464603649384010030200100006020020000700503511402011009910010000301001000001001000011000013100001010261017111698103000399910000301007004870048700367003670051
40204700355250011070035697355969525401003010010000301001000061600533420621496695670047700356463103649504010030200100006020020000700473511402011009910010000301001000001001000001000000100001010261017111698133000060610000301007005670051700487004870051
40204700475250000070035697815970925401043010310001301001000061617533422061496696770053700476463103649504010030200100006020020000700353511402011009910010000301001000001001000001000000100001010261017111698133000306910000301007003670051700517005170051
40204700505250011070032697815970925401043010310000301001000061600533414701496695570050700356463103649534010030200100006020020000700353511402011009910010000301001000001001000011000000100001100261017111698133000090910000301007005170051700487003670051
40204700355240010070035697815970925401043010310000301001000061601533414701496697070035700506464303649534010030200100006020020000700503511402011009910010000301001000001001000011000000100000000261017111697983000399610000301007004870036700487005170051
40204700475240010070035697355970925401043010310000301001000061601533420621496697070047700506464303649384010030200100006020020000700473511402011009910010000301001000001001000011000000100001000261017111698133000306910000301007005170051700517005170051

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0060

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
400257005152411100002010070045697845971925400183001610002300101000061699533426861149669777006070060646593649824001030020100006002020000700603511400211091010000300101000010100013110002001110000111102520971926982330006130010000300107004270042700617004270042
400247006052410100003500007004569784597192540018300161000130010100006170903341769214966977700417004164659364985400103002010000600202000070060351140021109101000030010100001010002201000100111000001111252047145698043000613101310000300107006570060700427005870059
40024700575251011000200007004269781597012540014300161000230010100006169953342686114967182700417006064675364966400103002010000600202000070060351140021109101000030010100001010001201000100041001211010252047154698233000600010000300107006170042700587006170042
400247004152511100002010170045697845971925400183001610001300101000061707233426861149669777004170057646623649874001030020100006002020000700573511400211091010000300101000010100012110001001410000111102520571646980430006010010000300107006170058700427004270042
400247005852511110002000070045697845971925400183001610002300101000061707233426861149669777005770041646753649664001030020100006002020000700413511400211091010000300101000010100022010002001410000111112520471356982030006013010000300107005870042700617006170319
40024700415241010100200007004569781597162540014300161000230010100006170723341769114966980700607006064678364966400103002010000600202000070060351140021109101000030010100001010001111000110211000001010252057144698203000313101010000300107006170061700617006170064
4002470060525111100020001701286970259701254001830016100013001010000617045334268621496698070041700416467836498540010300201000060020200007006135214002110910100003001010000101000110100020004100000111125206788569969300031313010000300107006270061700587005870061
4002470060525101010058810070045697025970125400183001610001300101000061707233425421149669807004170041646783649854001030020100006002020442700623511400211091010000300101000010100031110002010110000011102520471446982030003001010000300107006170058700617005870064
4002470430524100000020001700266978459701254001830016100013001010204617135334268611496696170057700416467936498240788300201000060350201087022435114002110910100003001010000101000111100080037100001111225204712469805300060131010000300107006170066700587006170066
400247006052510101002000070045697855971625400183001610002300101000061699533427341149669617006070057646753650474001030020100006002020000700413511400211091010000300101000010100024110002010710000111102520571547013330006100010000300107005870042700587015070063

Test 3: Latency 1->3 (with chain penalty)

Chain cycles: 3

Code:

  ldr x0, [x6, x7]
  eor x8, x8, x0
  eor x8, x8, x0
  add x7, x7, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0054

retire uop (01)cycle (02)030e0f18191e1f22243f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
40205700515241100100070039697855971025401043010310002301001000061617533425421496697170054700516465036495740100302001000060200200007005435114020110099100100003010010000110010000011000000310000110026102712269819300031310010000301007005570055700557005570052
40204700515241000100070020697825971325401043010310001301001000061604133423981496697170054700546465036493840100302001000060200200007005435114020110099100100003010010000110010000001000000010000110026102712269814300000101310000301007005570052700557005570055
402047005452510001000700446978559713254010030103100013010010000616014334225414966978700547005164650364954401003020010000602002000070054351140201100991001000030100100000100100000110000000100001100261027122698173000313101310000301007005570055701577003670055
402047005452400000000700396978559713254010430103100013010010000616041334239814966974700547005464650364957401003020010000602002000070054351140201100991001000030100100000100100000110000000100001100261027122698173000313131310000301007005570055700557005270055
40204700545250000101070039697855971325401043010010001301001000061604133414701496697470054700516465036495740100302001000060200200007005435114020110099100100003010010000010010000011000000010000110026102712269817300031010010000301007005570055700557003670055
4020470054525000010007002069785597132540104301001000130100100006160413342398149669747005470051646313649384010030200100006059420000700593511402011009910010000301001000001001000001100000001000111002610271226981430003010010000301007005570055700557005570055
40204700545250000100070039697855971325401043010310001301001000061617533423981496697470054700546465536493840100302001000060200200007005435114020110099100100003010010000010010000011000000010000110026102712269817300031013010000301007005570036700527005270055
4020470035525000025010700366978559710254010030103100013010010000616041334239814966971700547005464650364957401003020010000602002000070054351140201100991001000030100100000100100000110000000100001100261027122698173000313131310000301007005570055700527005570036
40204700545240000000070020697855971325401043010010001301001000061604133414701496697470035700516465036495740100302001000060200200007005435114020110099100100003010010000010010000001000000010000010026102712269817300031301310000301007005570055700527005570055
402047005452400001000700396976459710254010030103100013010010000616175334147014966974700547005464650364954401003020010000602002000070054351140201100991001000030100100000100100000110000000100001100261027122698143000310101310000301007005570055700557005570036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0047

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0e0f18191e22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
40025700475241011001000700366972859706254001030013100013001010000617068334206204966955700357004764665364972400103002010000600202000070035351140021109101000030010100000101000011000000010000110252157132698103000060610000300107004870051700487003670036
40024700355240000001100700326972859706254001430010100013001010000617068334206214966967700477004764665364972400103002010000600202000070047351140021109101000030010100001101000011000000010000100252137133698103000069010000300107003670036700487005170036
40024700475240000001001700356974359706254001430013100003001010000616982334147014966955700477005364665364972400103002010000600202000070035351140021109101000030010100000101000011000000010000100252147133698103000366010000300107005170048700487004870048
40024700505240000111100700326974359706254001030013100013001010000617068334206214966955700477004764665364975400103002010000600202000070047351140021109101000030010100000101000011000000010000100252137134697983000366010000300107003670036700487004870048
40024700355250000001100700346974359700254005430013100013001010000616952334147004966967700357004764665364960400103002010000600202000070047351140021109101000030010100000101000011000000010000110252137132698103000366610000300107005570059700487004870048
40024700475250000001000700326974359695254001030013100013001010000616952334206204967037700487004864665364972400103002010000600202000070122351140021109101000030010100001101000011000000010000100252137134697983000006610000300107004870048700367004870048
40024700475240000001000700326976059695254001430013100013001010000616952334147014966955700477004764653364975400103002010000600202000070035351140021109101000030010100000101000001000000010000100252137133697983000399610000300107004870048700367004870048
400247003552500000000007003269743597062540010300131000130010100006170683342062149669677004770047646533649724001030020100006002020000700473511400211091010000300101000001010003110000480010000100255437143698103000360610000300107004870048700487004870048
40024700475250000001000700326974359706254001030010100013001010000616952334147004966967700477005064653364972400103002010000600202000070047351140021109101000030010100000101000011000000310000010252137143698103000306610000300107003670036700487004870048
40024700475250001000100700326974359706254001430013100013001010000617068334206204966955700357004764665364960400103002010000600202000070047351140021109101000030010100000101000011000000010000010252137133698103000366010000300107005170036700487004870048

Test 4: throughput

Count: 8

Code:

  ldr x0, [x6, x7]
  ldr x0, [x6, x7]
  ldr x0, [x6, x7]
  ldr x0, [x6, x7]
  ldr x0, [x6, x7]
  ldr x0, [x6, x7]
  ldr x0, [x6, x7]
  ldr x0, [x6, x7]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03l1d tlb fill (05)09l2 tlb miss data (0b)0e0f1e22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020526731200000006710226725207762580100100800001008001250011673104923634026736267141664261668880116200800242001600482671485118020110099100800001008000001008001919438005920161800406160431911115118216442673313135800001002673726737267372683226742
802042673620011100670022669937726258010010080000100800175001168046492363402671426714166636166898011520080024200160048267146411802011009910080000100800000100800192043800191012780000011943191111511841643267341300800001002673826737267372679726717
80204267272000000000012671221207625801001008000010080015500117711649236470267072672716655616659801152008002420016004826727771180201100991008000010080000010080000008003900048800396104400111511841633267240104800001002672826728267282679026741
80204267272000000000012669220002580100100800001008001450011772614923627026727267071663561667980115200800242001600482672756118020110099100800001008000001008000004380000046063800000038000111511841644267240104800001002673526732269452672826753
80204267272000000000012669220126925801001008000010080015500117711649236510267272670716655616679801132008002420016004826707771180201100991008000010080000010080000043800390103980039613943001115118416432672414107800001002672826728267282686526733
80204267272000000045100267122121271258010010080000100800155001165856492364702672726731166556166798011520080024200160048267315611802011009910080000100800000100800000438003903039800380038000111511831643267240104800001002672826733267282684426743
8020426707200000000101267120121692580100100800001008001550011671984923647026727267271665561668380115200800242001600482672777118020110099100800001008000001008000000800390303980039600000111511841622267240144800001002672826708267282685026744
8020426727200000000101266920112742580100100800001008001550011665964923647026727267271663561667980115200800242001600482672756118020110099100800001008000001008000004380039010080039603943001115118416332670414104800001002672826728267282679926736
802042672720000000451002669221212672580100100800001008001450011657424923647026707267271665561667980115200800242001600482672777118020110099100800001008000001008000004380038048042800006000001115118416442672414107800001002672826728269362674026796
8020426730200000004510126712211279258010010080000100800155001166596492364702672726727166356166798011520080024200160048267075611802011009910080000100800000100800000438000001039800396104300111511841634267240147800001002672826728267282684626733

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)030e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002526727200114500126724212016258001010800001080000501168843149236470267282672716676316688800102080000201600002673156118002110910800001080000010800000800390388003960394450208161010268011010480000102672826732267292672926709
800242672720000451012671621102580010108000010800005011671241492364702672826708166723167088001020800002016000026727771180021109108000010800000108000043800390388003961005020916710268321014780000102672826728267282673226729
80024267282000044000267122111925800101080000108000050116884314923648026708267081667631670780010208000020160000267287711800211091080000108000001080000438000000800386139050201016101026734140480000102673226732267092672826728
8002426708200004400026712210192580010108000010800005011667500492365102670826708166723167118001020800002016000026731561180021109108000010800000108000008003803880000013844502071681026805140780000102672926728267092672926709
80024267082000044100266992112192580010108000010801785011692541492362802672726727166723166888001020800002016000026708771180021109108000010800000108000043800380388003861394350201016107267341010480000102670926728267312673226732
800242672720011010126719312002580010108000010800005011672010492365102672726708166723167118001020800002016000026727771180021109108000010800000108000044800390388003861394350206167102676700780000102673226709267282673226732
8002426731200004500126693212121925800101080000108000050116884304923647026728267271667231671180010208000020160000267317711800211091080000108000001080000448003800800006139435020916710268001410780000102673226736267092673226732
800242673120000441012669320119258001010800001080000501167124049236470267282670816676316711800102080000201600002672877118002110910800001080000010800004380039039800396139435020916108267981010480000102673226728267092672926709
8002426708200004410126716011219258001010800001080000501167201049236510267082670816676316711800102080000201600002672777118002110910800001080000010800004380039038800006139445020111610102684000480000102672926729267282672826732
800242673120010450002672201211625800101080000108000050116675004923651026708267311665231670880010208000020160000267285611800211091080000108000001080000080039039800390004450209161111267971414480000102673226728267092673226729